{
/* Just some sanity to ensure that the scheduler is set up okay. */
ASSERT(current->domain == IDLE_DOMAIN_ID);
- domain_unpause_by_systemcontroller(current->domain);
raise_softirq(SCHEDULE_SOFTIRQ);
do_softirq();
/* Create initial domain 0. */
printk("About to call do_createdomain()\n");
dom0 = do_createdomain(0, 0);
-printk("About to call init_idle_task()\n");
init_task.domain = &idle0_domain;
init_task.processor = 0;
// init_task.mm = &init_mm;
init_task.domain->arch.mm = &init_mm;
// init_task.thread = INIT_THREAD;
- init_idle_task();
//arch_do_createdomain(current);
#ifdef CLONE_DOMAIN0
{
console_endboot(cmdline && strstr(cmdline, "tty0"));
#endif
- domain_unpause_by_systemcontroller(current->domain);
#ifdef CLONE_DOMAIN0
{
int i;
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/io.h>
-#include <asm/irq.h>
#include <asm/mpspec.h>
#include <mach_apic.h>
#include <mach_mpparse.h>
#define APIC_DIVISOR 1
-static void __setup_APIC_LVTT(unsigned int clocks)
+void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt_value, tmp_value, ver;
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-/*
- * this is done for every CPU from setup_APIC_clocks() below.
- * We setup each local APIC with a zero timeout value for now.
- * Unlike Linux, we don't have to wait for slices etc.
- */
-void setup_APIC_timer(void * data)
+static void __init setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
- __save_flags(flags);
- __sti();
- __setup_APIC_LVTT(0);
- __restore_flags(flags);
+
+ local_irq_save(flags);
+
+ /*
+ * Wait for IRQ0's slice:
+ */
+ wait_timer_tick();
+
+ __setup_APIC_LVTT(clocks);
+
+ local_irq_restore(flags);
}
/*
- * In this function we calibrate APIC bus clocks to the external timer.
- *
- * As a result we have the Bus Speed and CPU speed in Hz.
- *
- * We want to do the calibration only once (for CPU0). CPUs connected by the
- * same APIC bus have the very same bus frequency.
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
*
- * This bit is a bit shoddy since we use the very same periodic timer interrupt
- * we try to eliminate to calibrate the APIC.
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
*/
int __init calibrate_APIC_clock(void)
return result;
}
-/*
- * initialise the APIC timers for all CPUs
- * we start with the first and find out processor frequency and bus speed
- */
-void __init setup_APIC_clocks (void)
+
+static unsigned int calibration_result;
+
+void __init setup_boot_APIC_clock(void)
{
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
- __cli();
- /* calibrate CPU0 for CPU speed and BUS speed */
- bus_freq = calibrate_APIC_clock();
- /* Now set up the timer for real. */
- setup_APIC_timer((void *)bus_freq);
- __sti();
- /* and update all other cpus */
- smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+
+ local_irq_disable();
+
+ calibration_result = calibrate_APIC_clock();
+ /*
+ * Now set up the timer for real.
+ */
+ setup_APIC_timer(calibration_result);
+
+ local_irq_enable();
+}
+
+void __init setup_secondary_APIC_clock(void)
+{
+ setup_APIC_timer(calibration_result);
+}
+
+void __init disable_APIC_timer(void)
+{
+ if (using_apic_timer) {
+ unsigned long v;
+
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ }
+}
+
+void enable_APIC_timer(void)
+{
+ if (using_apic_timer) {
+ unsigned long v;
+
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
+ }
}
#undef APIC_DIVISOR
ack_APIC_irq();
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
smp_processor_id());
}
6: Received illegal vector
7: Illegal register address
*/
- printk("APIC error on CPU%d: %02lx(%02lx)\n",
- smp_processor_id(), v, v1);
+ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+ smp_processor_id(), v , v1);
}
/*
connect_bsp_APIC();
-#ifdef CONFIG_SMP
- cpu_online_map = 1;
-#endif
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
- apic_write_around(APIC_ID, boot_cpu_physical_apicid);
setup_local_APIC();
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config)
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
#endif
- setup_APIC_clocks();
+ setup_boot_APIC_clock();
return 0;
}
#include <xen/lib.h>
#include <asm/uaccess.h>
#include <xen/serial.h>
-#include <asm/irq.h>
+#include <xen/irq.h>
#include <xen/spinlock.h>
#include <asm/debugger.h>
#include <xen/init.h>
{
dom0_physinfo_t *pi = &op->u.physinfo;
- pi->ht_per_core = opt_noht ? 1 : ht_per_core;
- pi->cores = smp_num_cpus / pi->ht_per_core;
+ pi->ht_per_core = ht_per_core;
+ pi->cores = num_online_cpus() / ht_per_core;
pi->total_pages = max_page;
pi->free_pages = avail_domheap_pages();
pi->cpu_khz = cpu_khz;
void idle_loop(void)
{
int cpu = smp_processor_id();
+
for ( ; ; )
{
irq_stat[cpu].idle_timestamp = jiffies;
+
while ( !softirq_pending(cpu) )
{
page_scrub_schedule_work();
default_idle();
}
+
do_softirq();
}
}
-static void __startup_cpu_idle_loop(struct exec_domain *ed)
-{
- /* Signal to boot CPU that we are done. */
- init_idle();
-
- /* Start normal idle loop. */
- ed->arch.schedule_tail = continue_idle_task;
- continue_idle_task(ed);
-}
-
void startup_cpu_idle_loop(void)
{
struct exec_domain *ed = current;
- /* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(ed->domain->domain_id == IDLE_DOMAIN_ID);
+ ASSERT(is_idle_task(ed->domain));
percpu_ctxt[smp_processor_id()].curr_ed = ed;
set_bit(smp_processor_id(), &ed->domain->cpuset);
- domain_unpause_by_systemcontroller(ed->domain);
-
- ed->arch.schedule_tail = __startup_cpu_idle_loop;
- raise_softirq(SCHEDULE_SOFTIRQ);
- do_softirq();
+ ed->arch.schedule_tail = continue_idle_task;
- /* End up in __startup_cpu_idle_loop, not here. */
- BUG();
+ idle_loop();
}
static long no_idt[2];
ed->arch.flags = TF_kernel_mode;
- if ( d->domain_id == IDLE_DOMAIN_ID )
+ if ( is_idle_task(d) )
return;
ed->arch.schedule_tail = continue_nonidle_task;
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
- d->shared_info->n_vcpu = smp_num_cpus;
+ d->shared_info->n_vcpu = num_online_cpus();
/* Set up monitor table */
update_pagetables(ed);
pin = (address - 0x10) >> 1;
- rte.dest.logical.logical_dest = target_cpus();
+ rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
*(int *)&rte = val;
if ( rte.vector >= FIRST_DEVICE_VECTOR )
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
+ cpumask_t cpumask = CPU_MASK_NONE;
if ( !IS_CAPABLE_PHYSDEV(d) )
return -EPERM;
desc->handler->startup(irq);
/* Attempt to bind the interrupt target to the correct CPU. */
+ cpu_set(ed->processor, cpumask);
if ( desc->handler->set_affinity != NULL )
- desc->handler->set_affinity(
- irq, apicid_to_phys_cpu_present(ed->processor));
+ desc->handler->set_affinity(irq, cpumask);
}
else if ( !will_share || !action->shareable )
{
#define up(_m) spin_unlock(_m)
#define vmalloc(_s) xmalloc_bytes(_s)
#define vfree(_p) xfree(_p)
-#define num_online_cpus() smp_num_cpus
#if 0
MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
#define down(_m) spin_lock(_m)
#define up(_m) spin_unlock(_m)
-#define num_booting_cpus() smp_num_cpus
-
u32 num_var_ranges = 0;
unsigned int *usage_table;
printk("Testing NMI watchdog --- ");
- for ( cpu = 0; cpu < smp_num_cpus; cpu++ )
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
prev_nmi_count[cpu] = nmi_count(cpu);
- __sti();
+ local_irq_enable();
mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
- for ( cpu = 0; cpu < smp_num_cpus; cpu++ )
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
{
+ if ( !cpu_isset(cpu, cpu_callin_map) &&
+ !cpu_isset(cpu, cpu_online_map) )
+ continue;
if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
printk("CPU#%d stuck. ", cpu);
else
spin_unlock_irqrestore(&watchdog_lock, flags);
}
-void touch_nmi_watchdog (void)
-{
- int i;
- for (i = 0; i < smp_num_cpus; i++)
- alert_counter[i] = 0;
-}
-
void nmi_watchdog_tick (struct cpu_user_regs * regs)
{
int sum, cpu = smp_processor_id();
int opt_noht = 0;
boolean_param("noht", opt_noht);
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
+
/* opt_watchdog: If true, run a watchdog NMI on each processor. */
static int opt_watchdog = 0;
boolean_param("watchdog", opt_watchdog);
int early_boot = 1;
+int ht_per_core = 1;
+cpumask_t cpu_present_map;
+
/* Limits of Xen heap, used to initialise the allocator. */
unsigned long xenheap_phys_start, xenheap_phys_end;
extern void time_init(void);
extern void ac_timer_init(void);
extern void initialize_keytable();
-extern int do_timer_lists_from_pit;
extern unsigned long cpu0_stack[];
#endif
EXPORT_SYMBOL(mmu_cr4_features);
-unsigned long wait_init_idle;
-
struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain };
int acpi_disabled;
-int phys_proc_id[NR_CPUS];
int logical_proc_id[NR_CPUS];
/* Standard macro to see if a specific flag is changeable. */
if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
clear_bit(X86_FEATURE_SEP, &c->x86_capability);
-#ifdef CONFIG_SMP
if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
{
u32 eax, ebx, ecx, edx;
int initial_apic_id, siblings, cpu = smp_processor_id();
-
+
cpuid(1, &eax, &ebx, &ecx, &edx);
ht_per_core = siblings = (ebx & 0xff0000) >> 16;
cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
}
}
-#endif
#ifdef CONFIG_VMX
start_vmx();
}
}
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+ printk("booted.\n");
+}
unsigned long cpu_initialized;
void __init cpu_init(void)
/* Install correct page table. */
write_ptbase(current);
-
- init_idle_task();
}
int acpi_force;
static void __init start_of_day(void)
{
+ int i;
+
/* Unmap the first page of CPU0's stack. */
memguard_guard_stack(cpu0_stack);
init_apic_mappings();
- scheduler_init();
-
init_IRQ();
trap_init();
arch_init_memory();
- smp_boot_cpus();
+ scheduler_init();
+
+ if ( opt_nosmp )
+ max_cpus = 0;
+ smp_prepare_cpus(max_cpus);
- __sti();
+ /* We aren't hotplug-capable yet. */
+ BUG_ON(!cpus_empty(cpu_present_map));
+ for_each_cpu ( i )
+ cpu_set(i, cpu_present_map);
initialize_keytable();
serial_init_stage2();
- if ( !cpu_has_apic )
+ ac_timer_init();
+
+ init_xen_time();
+
+ for_each_present_cpu ( i )
{
- do_timer_lists_from_pit = 1;
- if ( smp_num_cpus != 1 )
- panic("We need local APICs on SMP machines!");
+ if ( num_online_cpus() >= max_cpus )
+ break;
+ if ( !cpu_online(i) )
+ __cpu_up(i);
}
- ac_timer_init(); /* init accurate timers */
- init_xen_time(); /* initialise the time */
- schedulers_start(); /* start scheduler for each CPU */
-
- check_nmi_watchdog();
+ printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ smp_cpus_done(max_cpus);
do_initcalls();
- wait_init_idle = cpu_online_map;
- clear_bit(smp_processor_id(), &wait_init_idle);
- smp_threads_ready = 1;
- smp_commence(); /* Tell other CPUs that state of the world is stable. */
- while ( wait_init_idle != 0 )
- cpu_relax();
+ schedulers_start();
watchdog_enable();
-
-#ifdef CONFIG_X86_64 /* x86_32 uses low mappings when building DOM0. */
- zap_low_mappings();
-#endif
}
#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
set_current(&idle0_exec_domain);
set_processor_id(0);
+ smp_prepare_boot_cpu();
+
/* We initialise the serial devices very early so we can get debugging. */
serial_init_stage1();
/* Hide UART from DOM0 if we're using it */
serial_endboot();
- domain_unpause_by_systemcontroller(current->domain);
domain_unpause_by_systemcontroller(dom0);
+
startup_cpu_idle_loop();
}
// page table page needs to be vcpu private).
//
#if 0 // this should be enabled for SMP guests...
- flush_tlb_mask(((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()));
+ flush_tlb_mask(((1<<num_online_cpus()) - 1) & ~(1<<smp_processor_id()));
#endif
need_flush = 1;
* If there are no other CPUs in the system then we get an APIC send error
* if we try to broadcast. thus we have to avoid sending IPIs in this case.
*/
- if ( smp_num_cpus <= 1 )
+ if ( num_online_cpus() <= 1 )
return;
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
ASSERT(local_irq_is_enabled());
/* Flush everyone else. We definitely flushed just before entry. */
- if ( smp_num_cpus > 1 )
+ if ( num_online_cpus() > 1 )
{
spin_lock(&flush_lock);
- flush_cpumask = (1UL << smp_num_cpus) - 1;
+ flush_cpumask = (1UL << num_online_cpus()) - 1;
flush_cpumask &= ~(1UL << smp_processor_id());
flush_va = FLUSHVA_ALL;
send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
ASSERT(local_irq_is_enabled());
- cpuset = ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id());
+ cpuset = ((1UL << num_online_cpus()) - 1) & ~(1UL << smp_processor_id());
if ( cpuset == 0 )
return 0;
{
/* Stop all other CPUs in the system. */
smp_call_function(stop_this_cpu, NULL, 1, 0);
- smp_num_cpus = 1;
local_irq_disable();
disable_local_APIC();
* Fixes
* Felix Koop : NR_CPUS used properly
* Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIP report.
+ * Alan Cox : By repeated request 8) - Total BogoMIPS report.
* Greg Wright : Fix for kernel stacks panic.
* Erich Boleyn : MP v1.4 and additional changes.
* Matthias Sattler : Changes for 2.1 kernel map.
* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
* Martin J. Bligh : Added support for multi-quad systems
- */
+ * Dave Jones : Report invalid combinations of Athlon CPUs.
+* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
#include <xen/config.h>
#include <xen/init.h>
-#include <xen/irq.h>
+#include <xen/kernel.h>
#include <xen/mm.h>
-#include <xen/slab.h>
-#include <asm/flushtlb.h>
-#include <asm/mc146818rtc.h>
-#include <asm/smpboot.h>
-#include <xen/smp.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
#include <xen/sched.h>
+#include <xen/irq.h>
#include <xen/delay.h>
-#include <xen/lib.h>
+#include <asm/mc146818rtc.h>
+#include <asm/desc.h>
+#include <asm/div64.h>
+#include <asm/msr.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
-/* opt_nosmp: If true, secondary processors are ignored. */
-static int opt_nosmp = 0;
-boolean_param("nosmp", opt_nosmp);
-
-/* maxcpus: maximum number of CPUs to activate. */
-static int max_cpus = -1;
-integer_param("maxcpus", max_cpus);
+static int _foo;
+#define set_kernel_exec(x,y) (_foo=0)
+#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
+int tainted;
+#define TAINT_UNSAFE_SMP 0
-/* Total count of live CPUs */
-int smp_num_cpus = 1;
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
-/* Number of hyperthreads per core */
-int ht_per_core = 1;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
-/* Bitmask of currently online CPUs */
+/* bitmap of online cpus */
cpumask_t cpu_online_map;
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS];
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-/* Set when the idlers are all forked */
-int smp_threads_ready;
+u8 x86_cpu_to_apicid[NR_CPUS] =
+ { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
/*
* Trampoline 80x86 program as an array.
extern unsigned char trampoline_data [];
extern unsigned char trampoline_end [];
static unsigned char *trampoline_base;
+static int trampoline_exec;
/*
* Currently trivial. Write the real->protected mode
static unsigned long __init setup_trampoline(void)
{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
}
/*
*/
void __init smp_alloc_memory(void)
{
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- trampoline_base = __va(0x90000);
+ trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ if (__pa(trampoline_base) >= 0x9F000)
+ BUG();
+ /*
+ * Make the SMP trampoline executable:
+ */
+ trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
}
/*
* a given CPU
*/
-void __init smp_store_cpu_info(int id)
+static void __init smp_store_cpu_info(int id)
{
- cpu_data[id] = boot_cpu_data;
- if (id != 0)
- identify_cpu(&cpu_data[id]);
-}
-
-/*
- * Architecture specific routine called by the kernel just before init is
- * fired off. This allows the BP to have everything in order [we hope].
- * At the end of this all the APs will hit the system scheduling and off
- * we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
- * and give them jobs to do. smp_callin is a standard routine
- * we use to track CPUs as they power up.
- */
-
-static atomic_t smp_commenced = ATOMIC_INIT(0);
-
-void __init smp_commence(void)
-{
- /*
- * Lets the callins below out of their loop.
- */
- Dprintk("Setting commenced=1, go go go\n");
-
- wmb();
- atomic_set(&smp_commenced,1);
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ if (id!=0)
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+
+ /*
+ * Certain Athlons might work (for various values of 'work') in SMP
+ * but they are not certified as MP capable.
+ */
+ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+ /* Athlon 660/661 is valid. */
+ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+ goto valid_k7;
+
+ /* Duron 670 is valid */
+ if ((c->x86_model==7) && (c->x86_mask==0))
+ goto valid_k7;
+
+ /*
+ * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+ * It's worth noting that the A5 stepping (662) of some Athlon XP's
+ * have the MP bit set.
+ * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+ */
+ if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+ ((c->x86_model==7) && (c->x86_mask>=1)) ||
+ (c->x86_model> 7))
+ if (cpu_has_mp)
+ goto valid_k7;
+
+ /* If we get here, it's not a certified SMP capable AMD system. */
+ tainted |= TAINT_UNSAFE_SMP;
+ }
+
+valid_k7:
+ ;
}
/*
* TSC synchronization.
*
- * We first check wether all CPUs have their TSC's synchronized,
+ * We first check whether all CPUs have their TSC's synchronized,
* then we print a warning if not, and always resync.
*/
#define NR_LOOPS 5
-/*
- * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
- * multiplication. Not terribly optimized but we need it at boot time only
- * anyway.
- *
- * result == a / b
- * == (a1 + a2*(2^32)) / b
- * == a1/b + a2*(2^32/b)
- * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
- * ^---- (this multiplication can overflow)
- */
-
-static unsigned long long div64 (unsigned long long a, unsigned long b0)
-{
- unsigned int a1, a2;
- unsigned long long res;
-
- a1 = ((unsigned int*)&a)[0];
- a2 = ((unsigned int*)&a)[1];
-
- res = a1/b0 +
- (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
- a2 / b0 +
- (a2 * (0xffffffff % b0)) / b0;
-
- return res;
-}
-
static void __init synchronize_tsc_bp (void)
{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- int buggy = 0;
-
- printk("checking TSC synchronization across CPUs: ");
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- t0 = tsc_values[i];
- sum += t0;
- }
- avg = div64(sum, smp_num_cpus);
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*ticks_per_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = div64(delta, ticks_per_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
- i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ unsigned long one_usec;
+ int buggy = 0;
+
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+ /* convert from kcyc/sec to cyc/usec */
+ one_usec = cpu_khz / 1000;
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_isset(i, cpu_callout_map)) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ }
+ avg = sum;
+ do_div(avg, num_booting_cpus());
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = delta;
+ do_div(realdelta, one_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
}
static void __init synchronize_tsc_ap (void)
{
- int i;
-
- /*
- * smp_num_cpus is not necessarily known at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
- }
+ int i;
+
+ /*
+ * Not every cpu is online at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != num_booting_cpus())
+ mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
}
#undef NR_LOOPS
+extern void calibrate_delay(void);
+
static atomic_t init_deasserted;
void __init smp_callin(void)
{
- int cpuid, phys_id, i;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- while (!atomic_read(&init_deasserted));
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
- cpuid = smp_processor_id();
- if (test_and_set_bit(cpuid, &cpu_online_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- for ( i = 0; i < 200; i++ )
- {
- if ( test_bit(cpuid, &cpu_callout_map) ) break;
- mdelay(10);
- }
-
- if (!test_bit(cpuid, &cpu_callout_map)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
-
- setup_local_APIC();
-
- __sti();
-
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- /*
- * Allow the master to continue.
- */
- set_bit(cpuid, &cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- synchronize_tsc_ap();
+ int cpuid, phys_id, i;
+
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ wait_for_init_deassert(&init_deasserted);
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ cpuid = smp_processor_id();
+ if (cpu_isset(cpuid, cpu_callin_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup
+ */
+ for (i = 0; i < 200; i++) {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (cpu_isset(cpuid, cpu_callout_map))
+ break;
+ rep_nop();
+ mdelay(10);
+ }
+
+ if (!cpu_isset(cpuid, cpu_callout_map)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ smp_callin_clear_local_apic();
+ setup_local_APIC();
+ map_cpu_to_logical_apicid();
+
+#if 0
+ /*
+ * Get our bogomips.
+ */
+ calibrate_delay();
+ Dprintk("Stack at about %p\n",&cpuid);
+#endif
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+ disable_APIC_timer();
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+
+ /*
+ * Synchronize the TSC with the BP
+ */
+ if (cpu_has_tsc && cpu_khz)
+ synchronize_tsc_ap();
}
-static int cpucount;
+int cpucount;
-#ifdef __i386__
+#ifdef CONFIG_X86_32
static void construct_percpu_idt(unsigned int cpu)
{
- unsigned char idt_load[10];
+ unsigned char idt_load[10];
- idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
- memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
+ idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
- *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
- *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
- __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
}
#endif
/*
* Activate a secondary processor.
*/
-void __init start_secondary(void)
+void __init start_secondary(void *unused)
{
- unsigned int cpu = cpucount;
-
- extern void percpu_traps_init(void);
- extern void cpu_init(void);
-
- set_current(idle_task[cpu]);
- set_processor_id(cpu);
+ unsigned int cpu = cpucount;
- percpu_traps_init();
+ extern void percpu_traps_init(void);
+ extern void cpu_init(void);
- cpu_init();
+ set_current(idle_task[cpu]);
+ set_processor_id(cpu);
- smp_callin();
+ percpu_traps_init();
- while (!atomic_read(&smp_commenced))
- cpu_relax();
+ cpu_init();
+ smp_callin();
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
-#ifdef __i386__
- /*
- * At this point, boot CPU has fully initialised the IDT. It is
- * now safe to make ourselves a private copy.
- */
- construct_percpu_idt(cpu);
+#ifdef CONFIG_X86_32
+ /*
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
+ */
+ construct_percpu_idt(cpu);
#endif
- local_flush_tlb();
+ setup_secondary_APIC_clock();
+ enable_APIC_timer();
- startup_cpu_idle_loop();
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
- BUG();
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+
+ wmb();
+ startup_cpu_idle_loop();
}
extern struct {
- unsigned long esp, ss;
+ void * esp;
+ unsigned short ss;
} stack_start;
-/* which physical APIC ID maps to which logical CPU number */
-volatile int physical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which physical APIC ID */
-volatile int cpu_2_physical_apicid[NR_CPUS];
+#ifdef CONFIG_NUMA
-/* which logical APIC ID maps to which logical CPU number */
-volatile int logical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which logical APIC ID */
-volatile int cpu_2_logical_apicid[NR_CPUS];
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+ { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
-static inline void init_cpu_to_apicid(void)
-/* Initialize all maps between cpu number and apicids */
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
{
- int apicid, cpu;
-
- for (apicid = 0; apicid < MAX_APICID; apicid++) {
- physical_apicid_2_cpu[apicid] = -1;
- logical_apicid_2_cpu[apicid] = -1;
- }
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpu_2_physical_apicid[cpu] = -1;
- cpu_2_logical_apicid[cpu] = -1;
- }
+ printk("Mapping cpu %d to node %d\n", cpu, node);
+ cpu_set(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = node;
}
-static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
{
- physical_apicid_2_cpu[apicid] = cpu;
- cpu_2_physical_apicid[cpu] = apicid;
+ int node;
+
+ printk("Unmapping cpu %d from all nodes\n", cpu);
+ for (node = 0; node < MAX_NUMNODES; node ++)
+ cpu_clear(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = 0;
}
+#else /* !CONFIG_NUMA */
-static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+#define map_cpu_to_node(cpu, node) ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+ int cpu = smp_processor_id();
+ int apicid = logical_smp_processor_id();
+
+ cpu_2_logical_apicid[cpu] = apicid;
+ map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
{
- physical_apicid_2_cpu[apicid] = -1;
- cpu_2_physical_apicid[cpu] = -1;
+ cpu_2_logical_apicid[cpu] = BAD_APICID;
+ unmap_cpu_to_node(cpu);
}
#if APIC_DEBUG
-static inline void inquire_remote_apic(int apicid)
+static inline void __inquire_remote_apic(int apicid)
{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
}
#endif
+#ifdef WAKE_SECONDARY_VIA_NMI
+/*
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int timeout, maxlvt;
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ Dprintk("NMI sent.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
-static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
-
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
}
+#endif /* WAKE_SECONDARY_VIA_INIT */
-extern unsigned long cpu_initialized;
+extern cpumask_t cpu_initialized;
-static void __init do_boot_cpu (int apicid)
+static int __init do_boot_cpu(int apicid)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
*/
{
- struct domain *idle;
- struct exec_domain *ed;
- unsigned long boot_error = 0;
- int timeout, cpu;
- unsigned long start_eip;
- void *stack;
-
- cpu = ++cpucount;
+ struct domain *idle;
+ struct exec_domain *ed;
+ void *stack;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_eip;
+ unsigned short nmi_high = 0, nmi_low = 0;
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
+ cpu = ++cpucount;
- ed = idle->exec_domain[0];
+ if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+ panic("failed 'createdomain' for CPU %d", cpu);
- set_bit(_DOMF_idle_domain, &idle->domain_flags);
+ ed = idle_task[cpu] = idle->exec_domain[0];
- ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+ set_bit(_DOMF_idle_domain, &idle->domain_flags);
- map_cpu_to_boot_apicid(cpu, apicid);
+ ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
- idle_task[cpu] = ed;
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- /* So we see what's up. */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-
- stack = (void *)alloc_xenheap_pages(STACK_ORDER);
+ stack = (void *)alloc_xenheap_pages(STACK_ORDER);
#if defined(__i386__)
- stack_start.esp = __pa(stack);
+ stack_start.esp = (void *)__pa(stack);
#elif defined(__x86_64__)
- stack_start.esp = (unsigned long)stack;
+ stack_start.esp = stack;
#endif
- stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
-
- /* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_stack(stack);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if ( APIC_INTEGRATED(apic_version[apicid]) )
- {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- /*
- * Status is now clean
- */
- boot_error = 0;
-
- /*
- * Starting actual IPI sequence...
- */
-
- boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- set_bit(cpu, &cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (test_bit(cpu, &cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (test_bit(cpu, &cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- printk("CPU%d has booted.\n", cpu);
- } else {
- boot_error= 1;
- if (*((volatile unsigned int *)phys_to_virt(start_eip))
- == 0xA5A5A5A5)
+ stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+
+ /* Debug build: detect stack overflow by setting up a guard page. */
+ memguard_guard_stack(stack);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ store_NMI_vector(&nmi_high, &nmi_low);
+
+ smpboot_setup_warm_reset_vector(start_eip);
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned char *)trampoline_base)
+ == 0xA5)
/* trampoline started but...? */
- printk("Stuck ??\n");
- else
+ printk("Stuck ??\n");
+ else
/* trampoline code not run */
- printk("Not responding.\n");
-#if APIC_DEBUG
- inquire_remote_apic(apicid);
-#endif
- }
- }
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_boot_apicid(cpu, apicid);
- clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
- cpucount--;
- }
+ printk("Not responding.\n");
+ inquire_remote_apic(apicid);
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)trampoline_base) = 0;
+
+ return boot_error;
}
+#if 0
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+ unsigned long cachesize; /* kB */
+ unsigned long bandwidth = 350; /* MB/s */
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_khz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1) {
+ cachesize = 16; /* Pentiums, 2x8kB cache */
+ bandwidth = 100;
+ }
+
+ cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ }
+
+ cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_khz/1000),
+ ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+ printk("task migration cache decay timeout: %ld msecs.\n",
+ cache_decay_ticks);
+}
+#else
+#define smp_tune_scheduling() ((void)0)
+#endif
/*
* Cycle through the processors sending APIC IPIs to boot each.
static int boot_cpu_logical_apicid;
/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio = NULL;
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-void __init smp_boot_cpus(void)
+static void __init smp_boot_cpus(unsigned int max_cpus)
{
- int apicid, bit;
-
- /* Initialize the logical to physical CPU number mapping */
- init_cpu_to_apicid();
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d booted\n", 0);
-
- /*
- * We have the boot CPU online for sure.
- */
- set_bit(0, &cpu_online_map);
- boot_cpu_logical_apicid = logical_smp_processor_id();
- map_cpu_to_boot_apicid(0, boot_cpu_apicid);
-
- /*
- * If we couldnt find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config || opt_nosmp) {
- io_apic_irqs = 0;
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_online_map = 1;
- smp_num_cpus = 1;
- if (APIC_init_uniprocessor())
- printk("Local APIC not detected."
- " Using dummy APIC emulation.\n");
- goto smp_done;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- */
- if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
- printk("BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- io_apic_irqs = 0;
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_online_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- verify_local_APIC();
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- smp_found_config = 0;
- printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- io_apic_irqs = 0;
- phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_online_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- connect_bsp_APIC();
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
- BUG();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
-
- for (bit = 0; bit < NR_CPUS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if (apicid == boot_cpu_apicid)
- continue;
-
- /*
- * Don't start hyperthreads if option noht requested.
- */
- if (opt_noht && (apicid & (ht_per_core - 1)))
- continue;
-
- if (!check_apicid_present(bit))
- continue;
- if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
- continue;
-
- do_boot_cpu(apicid);
-
- /*
- * Make sure we unmap all failed CPUs
- */
- if ((boot_apicid_to_cpu(apicid) == -1) &&
- (!check_apicid_present(bit)))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
- }
-
- /*
- * Cleanup possible dangling ends...
- */
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-
- if (!cpucount) {
- printk("Error: only one processor found.\n");
- } else {
- printk("Total of %d processors activated.\n", cpucount+1);
- }
- smp_num_cpus = cpucount + 1;
-
- Dprintk("Boot done.\n");
-
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if ( nr_ioapics ) setup_IO_APIC();
-
- /* Set up all local APIC timers in the system. */
- {
- extern void setup_APIC_clocks(void);
- setup_APIC_clocks();
- }
-
- /* Synchronize the TSC with the AP(s). */
- if ( cpucount ) synchronize_tsc_bp();
-
- smp_done:
- ;
+ int apicid, cpu, bit, kicked;
+#ifdef BOGOMIPS
+ unsigned long bogosum = 0;
+#endif
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d: ", 0);
+ print_cpu_info(&cpu_data[0]);
+
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+
+ /*current_thread_info()->cpu = 0;*/
+ smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+
+ /*
+ * If we couldn't find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config && !acpi_lapic) {
+ printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ smpboot_clear_io_apic_irqs();
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ if (APIC_init_uniprocessor())
+ printk(KERN_NOTICE "Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+ map_cpu_to_logical_apicid();
+ return;
+ }
+
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ * Makes no sense to do this check in clustered apic mode, so skip it
+ */
+ if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ smpboot_clear_io_apic_irqs();
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ return;
+ }
+
+ verify_local_APIC();
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ smp_found_config = 0;
+ printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ smpboot_clear_io_apic_irqs();
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ return;
+ }
+
+ connect_bsp_APIC();
+ setup_local_APIC();
+ map_cpu_to_logical_apicid();
+
+
+ setup_portio_remap();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+
+ kicked = 1;
+ for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
+ /*
+ * Don't even attempt to start the boot CPU!
+ */
+ if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
+ continue;
+
+ if (!check_apicid_present(bit))
+ continue;
+ if (max_cpus <= cpucount+1)
+ continue;
+
+ if (do_boot_cpu(apicid))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
+ else
+ ++kicked;
+ }
+
+ /*
+ * Cleanup possible dangling ends...
+ */
+ smpboot_restore_warm_reset_vector();
+
+#ifdef BOGOMIPS
+ /*
+ * Allow the user to impress friends.
+ */
+ Dprintk("Before bogomips.\n");
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_isset(cpu, cpu_callout_map))
+ bogosum += cpu_data[cpu].loops_per_jiffy;
+ printk(KERN_INFO
+ "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
+ bogosum/(500000/HZ),
+ (bogosum/(5000/HZ))%100);
+#else
+ printk("Total of %d processors activated.\n", cpucount+1);
+#endif
+
+ Dprintk("Before bogocount - setting activated=1.\n");
+
+ if (smp_b_stepping)
+ printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+ /*
+ * Don't taint if we are running SMP kernel on a single non-MP
+ * approved Athlon
+ */
+ if (tainted & TAINT_UNSAFE_SMP) {
+ if (cpucount)
+ printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+ else
+ tainted &= ~TAINT_UNSAFE_SMP;
+ }
+
+ Dprintk("Boot done.\n");
+
+ /*
+ * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * efficiently.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (siblings != smp_num_siblings)
+ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+ }
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
+
+ smpboot_setup_io_apic();
+
+ setup_boot_APIC_clock();
+
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpu_has_tsc && cpucount && cpu_khz)
+ synchronize_tsc_bp();
}
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* This only works at boot for x86. See "rewrite" above. */
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ local_irq_enable();
+ return -ENOSYS;
+ }
+
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ local_irq_enable();
+ return -EIO;
+ }
+
+ local_irq_enable();
+ /* Unleash the CPU! */
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_IO_APIC
+ setup_ioapic_dest();
+#endif
+#ifdef CONFIG_X86_64
+ zap_low_mappings();
+#endif
+ /*
+ * Disable executability of the SMP trampoline:
+ */
+ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+}
+
+#if 0
+void __init smp_intr_init(void)
+{
+ /*
+ * IRQ0 must be given a fixed assignment and initialized,
+ * because it's used before the IO-APIC is set up.
+ */
+ set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+ /* IPI for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for generic function call */
+ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
+#endif
unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
int timer_ack = 0;
-int do_timer_lists_from_pit = 0;
unsigned long volatile jiffies;
/* PRIVATE */
write_unlock_irq(&time_lock);
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
- if ( do_timer_lists_from_pit )
+ if ( !cpu_has_apic )
raise_softirq(AC_TIMER_SOFTIRQ);
}
static inline int kernel_text_address(unsigned long addr)
{
+ extern char _stext, _etext;
if (addr >= (unsigned long) &_stext &&
addr <= (unsigned long) &_etext)
return 1;
#include <xen/lib.h>
#include <xen/trace.h>
#include <xen/sched.h>
+#include <xen/irq.h>
#include <xen/softirq.h>
#include <asm/current.h>
#include <asm/io.h>
-#include <asm/irq.h>
#include <asm/shadow.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
extern long do_block(void);
void do_nmi(struct cpu_user_regs *, unsigned long);
-int start_vmx()
+int start_vmx(void)
{
struct vmcs_struct *vmcs;
u32 ecx;
if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
printk("VMX disabled by Feature Control MSR.\n");
- return 0;
+ return 0;
}
}
- else
+ else {
wrmsr(IA32_FEATURE_CONTROL_MSR,
- IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+ IA32_FEATURE_CONTROL_MSR_LOCK |
+ IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+ }
set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
return 1;
}
-void stop_vmx()
+void stop_vmx(void)
{
if (read_cr4() & X86_CR4_VMXE)
__vmxoff();
return result;
}
-static void vmx_do_no_device_fault()
+static void vmx_do_no_device_fault(void)
{
unsigned long cr0;
struct ac_timer **new_heap = xmalloc_array(struct ac_timer *, limit);
if ( new_heap == NULL ) BUG();
memcpy(new_heap, heap, (limit>>1)*sizeof(struct ac_timer *));
- for ( i = 0; i < smp_num_cpus; i++ )
+ for ( i = 0; i < NR_CPUS; i++ )
if ( ac_timers[i].heap == heap )
ac_timers[i].heap = new_heap;
xfree(heap);
printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
(u32)(now>>32), (u32)now);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu( i )
{
printk("CPU[%02d] ", i);
spin_lock_irqsave(&ac_timers[i].lock, flags);
open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for ( i = 0; i < NR_CPUS; i++ )
{
ac_timers[i].heap = xmalloc_array(
struct ac_timer *, DEFAULT_HEAP_LIMIT+1);
unsigned int pro;
domid_t dom;
struct exec_domain *ed;
- unsigned int i, ht, cnt[NR_CPUS] = { 0 };
+ unsigned int i, cnt[NR_CPUS] = { 0 };
dom = op->u.createdomain.domain;
* domains will all share the second HT of each CPU. Since dom0 is on
* CPU 0, we favour high numbered CPUs in the event of a tie.
*/
- ht = opt_noht ? 1 : ht_per_core;
- pro = ht-1;
- for ( i = pro; i < smp_num_cpus; i += ht )
+ pro = ht_per_core - 1;
+ for ( i = pro; i < num_online_cpus(); i += ht_per_core )
if ( cnt[i] <= cnt[pro] )
pro = i;
else
{
/* pick a new cpu from the usable map */
- int new_cpu = (int)find_first_set_bit(cpumap) % smp_num_cpus;
+ int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
exec_domain_pause(ed);
if ( ed->processor != new_cpu )
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
- if ( (d->domain_id != IDLE_DOMAIN_ID) &&
+ if ( d->domain_id == IDLE_DOMAIN_ID )
+ set_bit(_DOMF_idle_domain, &d->domain_flags);
+
+ if ( !is_idle_task(d) &&
((init_event_channels(d) != 0) || (grant_table_create(d) != 0)) )
{
destroy_event_channels(d);
sched_add_domain(ed);
- if ( d->domain_id != IDLE_DOMAIN_ID )
+ if ( !is_idle_task(d) )
{
write_lock(&domlist_lock);
pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
#define round_pgdown(_p) ((_p)&PAGE_MASK)
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-static spinlock_t page_scrub_lock;
-struct list_head page_scrub_list;
+static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(page_scrub_list);
/*********************
* ALLOCATION BITMAP
static __init int page_scrub_init(void)
{
- spin_lock_init(&page_scrub_lock);
- INIT_LIST_HEAD(&page_scrub_list);
open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
return 0;
}
break;
case TYPE_CPU:
case TYPE_S_CPU:
- for ( j = sum = 0; j < smp_num_cpus; j++ )
+ sum = 0;
+ for_each_online_cpu ( j )
sum += atomic_read(&counters[j]);
printk("TOTAL[%10d] ", sum);
- for ( j = 0; j < smp_num_cpus; j++ )
+ for_each_online_cpu ( j )
printk("CPU%02d[%10d] ", j, atomic_read(&counters[j]));
counters += NR_CPUS;
break;
void perfc_reset(unsigned char key)
{
- int i, j, sum;
+ int i, j;
s_time_t now = NOW();
atomic_t *counters = (atomic_t *)&perfcounters;
counters += 1;
break;
case TYPE_CPU:
- for ( j = sum = 0; j < smp_num_cpus; j++ )
+ for ( j = 0; j < NR_CPUS; j++ )
atomic_set(&counters[j],0);
case TYPE_S_CPU:
counters += NR_CPUS;
break;
case TYPE_ARRAY:
- for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
+ for ( j = 0; j < NR_CPUS; j++ )
atomic_set(&counters[j],0);
case TYPE_S_ARRAY:
counters += perfc_info[i].nr_elements;
break;
case TYPE_CPU:
case TYPE_S_CPU:
- perfc_d[i].nr_vals = smp_num_cpus;
+ perfc_d[i].nr_vals = num_online_cpus();
break;
case TYPE_ARRAY:
case TYPE_S_ARRAY:
static int bvt_alloc_task(struct exec_domain *ed)
{
struct domain *d = ed->domain;
- if ( (d->sched_priv == NULL) ) {
+
+ if ( (d->sched_priv == NULL) )
+ {
if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
return -1;
memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
}
+
ed->sched_priv = &BVT_INFO(d)->ed_inf[ed->vcpu_id];
+
BVT_INFO(d)->ed_inf[ed->vcpu_id].inf = BVT_INFO(d);
BVT_INFO(d)->ed_inf[ed->vcpu_id].exec_domain = ed;
+
return 0;
}
ASSERT(inf != NULL);
ASSERT(d != NULL);
+ /* Allocate per-CPU context if this is the first domain to be added. */
+ if ( CPU_INFO(d->processor) == NULL )
+ {
+ schedule_data[d->processor].sched_priv = xmalloc(struct bvt_cpu_info);
+ BUG_ON(CPU_INFO(d->processor) == NULL);
+ INIT_LIST_HEAD(RUNQUEUE(d->processor));
+ CPU_SVT(d->processor) = 0;
+ }
+
if ( d->vcpu_id == 0 )
{
inf->mcu_advance = MCU_ADVANCE;
einf->exec_domain = d;
- if ( d->domain->domain_id == IDLE_DOMAIN_ID )
+ if ( is_idle_task(d->domain) )
{
einf->avt = einf->evt = ~0U;
+ BUG_ON(__task_on_runqueue(d));
+ __add_to_runqueue_head(d);
}
else
{
}
}
-static int bvt_init_idle_task(struct exec_domain *ed)
-{
- if ( bvt_alloc_task(ed) < 0 )
- return -1;
-
- bvt_add_task(ed);
-
- set_bit(_VCPUF_running, &ed->vcpu_flags);
- if ( !__task_on_runqueue(ed) )
- __add_to_runqueue_head(ed);
-
- return 0;
-}
-
static void bvt_wake(struct exec_domain *ed)
{
struct bvt_edom_info *einf = EBVT_INFO(ed);
}
}
-/* Initialise the data structures. */
-static int bvt_init_scheduler(void)
-{
- int i;
-
- for ( i = 0; i < NR_CPUS; i++ )
- {
- schedule_data[i].sched_priv = xmalloc(struct bvt_cpu_info);
-
- if ( schedule_data[i].sched_priv == NULL )
- {
- printk("Failed to allocate BVT scheduler per-CPU memory!\n");
- return -1;
- }
-
- INIT_LIST_HEAD(RUNQUEUE(i));
-
- CPU_SVT(i) = 0; /* XXX do I really need to do this? */
- }
-
- return 0;
-}
-
struct scheduler sched_bvt_def = {
.name = "Borrowed Virtual Time",
.opt_name = "bvt",
.sched_id = SCHED_BVT,
- .init_scheduler = bvt_init_scheduler,
- .init_idle_task = bvt_init_idle_task,
.alloc_task = bvt_alloc_task,
.add_task = bvt_add_task,
.free_task = bvt_free_task,
#include <xen/time.h>
#include <xen/slab.h>
-/*#include <xen/adv_sched_hist.h>*/
-
/*verbosity settings*/
#define SEDFLEVEL 0
#define PRINT(_f, _a...) \
-if ((_f)<=SEDFLEVEL) printk(_a );
+ if ((_f)<=SEDFLEVEL) printk(_a );
#ifndef NDEBUG
- #define SEDF_STATS
- #define CHECK(_p) if ( !(_p) ) \
- { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
- __FILE__);}
+#define SEDF_STATS
+#define CHECK(_p) if ( !(_p) ) \
+ { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
+ __FILE__);}
#else
- #define CHECK(_p) ((void)0)
+#define CHECK(_p) ((void)0)
#endif
/*various ways of unblocking domains*/
struct sedf_dom_info {
- struct domain *domain;
+ struct domain *domain;
};
struct sedf_edom_info
{
- struct exec_domain *exec_domain;
- struct list_head list;
- struct list_head extralist[2];
-
- /*Parameters for EDF*/
- s_time_t period; /*=(relative deadline)*/
- s_time_t slice; /*=worst case execution time*/
-
- /*Advaced Parameters*/
- /*Latency Scaling*/
- s_time_t period_orig;
- s_time_t slice_orig;
- s_time_t latency;
-
- /*status of domain*/
- int status;
- /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
- short weight;
- short extraweight;
- /*Bookkeeping*/
- s_time_t deadl_abs;
- s_time_t sched_start_abs;
- s_time_t cputime;
- /* times the domain un-/blocked */
- s_time_t block_abs;
- s_time_t unblock_abs;
-
- /*scores for {util, block penalty}-weighted extratime distribution*/
- int score[2];
- s_time_t short_block_lost_tot;
-
- /*Statistics*/
- s_time_t extra_time_tot;
+ struct exec_domain *exec_domain;
+ struct list_head list;
+ struct list_head extralist[2];
+
+ /*Parameters for EDF*/
+ s_time_t period; /*=(relative deadline)*/
+ s_time_t slice; /*=worst case execution time*/
+
+ /*Advaced Parameters*/
+ /*Latency Scaling*/
+ s_time_t period_orig;
+ s_time_t slice_orig;
+ s_time_t latency;
+
+ /*status of domain*/
+ int status;
+ /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
+ short weight;
+ short extraweight;
+ /*Bookkeeping*/
+ s_time_t deadl_abs;
+ s_time_t sched_start_abs;
+ s_time_t cputime;
+ /* times the domain un-/blocked */
+ s_time_t block_abs;
+ s_time_t unblock_abs;
+
+ /*scores for {util, block penalty}-weighted extratime distribution*/
+ int score[2];
+ s_time_t short_block_lost_tot;
+
+ /*Statistics*/
+ s_time_t extra_time_tot;
#ifdef SEDF_STATS
- s_time_t block_time_tot;
- s_time_t penalty_time_tot;
- int block_tot;
- int short_block_tot;
- int long_block_tot;
- int short_cont;
- int pen_extra_blocks;
- int pen_extra_slices;
+ s_time_t block_time_tot;
+ s_time_t penalty_time_tot;
+ int block_tot;
+ int short_block_tot;
+ int long_block_tot;
+ int short_cont;
+ int pen_extra_blocks;
+ int pen_extra_slices;
#endif
};
struct sedf_cpu_info {
- struct list_head runnableq;
- struct list_head waitq;
- struct list_head extraq[2];
+ struct list_head runnableq;
+ struct list_head waitq;
+ struct list_head extraq[2];
};
-#define EDOM_INFO(d) ((struct sedf_edom_info *)((d)->sched_priv))
-#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
-#define LIST(d) (&EDOM_INFO(d)->list)
-#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
-#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
-#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
-#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
-#define IDLETASK(cpu) ((struct exec_domain *)schedule_data[cpu].idle)
+#define EDOM_INFO(d) ((struct sedf_edom_info *)((d)->sched_priv))
+#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
+#define LIST(d) (&EDOM_INFO(d)->list)
+#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
+#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
+#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
+#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
+#define IDLETASK(cpu) ((struct exec_domain *)schedule_data[cpu].idle)
-#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
+#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
#define MIN(x,y) (((x)<(y))?(x):(y))
#define DIV_UP(x,y) (((x) + (y) - 1) / y)
static void sedf_dump_cpu_state(int i);
static inline int extraq_on(struct exec_domain *d, int i) {
- return ((EXTRALIST(d,i)->next != NULL) &&
- (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
+ return ((EXTRALIST(d,i)->next != NULL) &&
+ (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
}
static inline void extraq_add_head(struct exec_domain *d, int i)
static inline void extraq_del(struct exec_domain *d, int i)
{
- struct list_head *list = EXTRALIST(d,i);
- ASSERT(extraq_on(d,i));
- PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
- d->vcpu_id, i);
- list_del(list);
- list->next = NULL;
- ASSERT(!extraq_on(d, i));
+ struct list_head *list = EXTRALIST(d,i);
+ ASSERT(extraq_on(d,i));
+ PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
+ d->vcpu_id, i);
+ list_del(list);
+ list->next = NULL;
+ ASSERT(!extraq_on(d, i));
}
/* adds a domain to the queue of processes which are aware of extra time. List
charging each domain that recieved extratime with an inverse of its weight.
*/
static inline void extraq_add_sort_update(struct exec_domain *d, int i, int sub) {
- struct list_head *cur;
- struct sedf_edom_info *curinf;
-
- ASSERT(!extraq_on(d,i));
- PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
- " to L%i extraq\n",
- d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
- EDOM_INFO(d)->short_block_lost_tot, i);
- /*iterate through all elements to find our "hole" and on our way
- update all the other scores*/
- list_for_each(cur,EXTRAQ(d->processor,i)){
- curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
- curinf->score[i] -= sub;
- if (EDOM_INFO(d)->score[i] < curinf->score[i])
- break;
- else
- PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
- curinf->exec_domain->domain->domain_id,
- curinf->exec_domain->vcpu_id, curinf->score[i]);
- }
- /*cur now contains the element, before which we'll enqueue*/
- PRINT(3, "\tlist_add to %p\n", cur->prev);
- list_add(EXTRALIST(d,i),cur->prev);
-
- /*continue updating the extraq*/
- if ((cur != EXTRAQ(d->processor,i)) && sub)
- for (cur = cur->next; cur != EXTRAQ(d->processor,i);
- cur = cur-> next) {
- curinf = list_entry(cur,struct sedf_edom_info,
- extralist[i]);
- curinf->score[i] -= sub;
- PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
- curinf->exec_domain->domain->domain_id,
- curinf->exec_domain->vcpu_id, curinf->score[i]);
- }
- ASSERT(extraq_on(d,i));
+ struct list_head *cur;
+ struct sedf_edom_info *curinf;
+
+ ASSERT(!extraq_on(d,i));
+ PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
+ " to L%i extraq\n",
+ d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
+ EDOM_INFO(d)->short_block_lost_tot, i);
+ /*iterate through all elements to find our "hole" and on our way
+ update all the other scores*/
+ list_for_each(cur,EXTRAQ(d->processor,i)){
+ curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
+ curinf->score[i] -= sub;
+ if (EDOM_INFO(d)->score[i] < curinf->score[i])
+ break;
+ else
+ PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
+ curinf->exec_domain->domain->domain_id,
+ curinf->exec_domain->vcpu_id, curinf->score[i]);
+ }
+ /*cur now contains the element, before which we'll enqueue*/
+ PRINT(3, "\tlist_add to %p\n", cur->prev);
+ list_add(EXTRALIST(d,i),cur->prev);
+
+ /*continue updating the extraq*/
+ if ((cur != EXTRAQ(d->processor,i)) && sub)
+ for (cur = cur->next; cur != EXTRAQ(d->processor,i);
+ cur = cur-> next) {
+ curinf = list_entry(cur,struct sedf_edom_info,
+ extralist[i]);
+ curinf->score[i] -= sub;
+ PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
+ curinf->exec_domain->domain->domain_id,
+ curinf->exec_domain->vcpu_id, curinf->score[i]);
+ }
+ ASSERT(extraq_on(d,i));
}
static inline void extraq_check(struct exec_domain *d) {
- if (extraq_on(d, EXTRA_UTIL_Q)) {
- PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
- if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
- !extra_runs(EDOM_INFO(d))) {
- extraq_del(d, EXTRA_UTIL_Q);
- PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
- d->domain->domain_id, d->vcpu_id);
- }
- } else {
- PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
- d->vcpu_id);
- if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
- {
- #if (EXTRA == EXTRA_ROUNDR)
- extraq_add_tail(d, EXTRA_UTIL_Q);
- #elif (EXTRA == EXTRA_SLICE_WEIGHT || \
- EXTRA == EXTRA_BLOCK_WEIGHT)
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
- #elif
- ;
- #endif
- PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
- d->vcpu_id);
- }
- }
+ if (extraq_on(d, EXTRA_UTIL_Q)) {
+ PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
+ if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
+ !extra_runs(EDOM_INFO(d))) {
+ extraq_del(d, EXTRA_UTIL_Q);
+ PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
+ d->domain->domain_id, d->vcpu_id);
+ }
+ } else {
+ PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
+ d->vcpu_id);
+ if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
+ {
+#if (EXTRA == EXTRA_ROUNDR)
+ extraq_add_tail(d, EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT || \
+ EXTRA == EXTRA_BLOCK_WEIGHT)
+ extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#elif
+ ;
+#endif
+ PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
+ d->vcpu_id);
+ }
+ }
}
static inline void extraq_check_add_unblocked(struct exec_domain *d,
- int priority) {
- struct sedf_edom_info *inf = EDOM_INFO(d);
- if (inf->status & EXTRA_AWARE)
- #if (EXTRA == EXTRA_ROUNDR)
- if (priority)
- extraq_add_head(d,EXTRA_UTIL_Q);
- else
- extraq_add_tail(d,EXTRA_UTIL_Q);
- #elif (EXTRA == EXTRA_SLICE_WEIGHT \
- || EXTRA == EXTRA_BLOCK_WEIGHT)
- /*put in on the weighted extraq,
- without updating any scores*/
- extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
- #else
- ;
- #endif
+ int priority) {
+ struct sedf_edom_info *inf = EDOM_INFO(d);
+ if (inf->status & EXTRA_AWARE)
+#if (EXTRA == EXTRA_ROUNDR)
+ if (priority)
+ extraq_add_head(d,EXTRA_UTIL_Q);
+ else
+ extraq_add_tail(d,EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT \
+ || EXTRA == EXTRA_BLOCK_WEIGHT)
+ /*put in on the weighted extraq,
+ without updating any scores*/
+ extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#else
+ ;
+#endif
}
static inline int __task_on_queue(struct exec_domain *d) {
- return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
+ return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
}
static inline void __del_from_queue(struct exec_domain *d)
{
typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
static inline void list_insert_sort(struct list_head *list,
- struct list_head *element, list_comparer comp) {
- struct list_head *cur;
- /*iterate through all elements to find our "hole"*/
- list_for_each(cur,list){
- if (comp(element, cur) < 0)
- break;
- }
- /*cur now contains the element, before which we'll enqueue*/
- PRINT(3,"\tlist_add to %p\n",cur->prev);
- list_add(element, cur->prev);
+ struct list_head *element, list_comparer comp) {
+ struct list_head *cur;
+ /*iterate through all elements to find our "hole"*/
+ list_for_each(cur,list){
+ if (comp(element, cur) < 0)
+ break;
+ }
+ /*cur now contains the element, before which we'll enqueue*/
+ PRINT(3,"\tlist_add to %p\n",cur->prev);
+ list_add(element, cur->prev);
}
#define DOMAIN_COMPARER(name, field, comp1, comp2) \
int name##_comp(struct list_head* el1, struct list_head* el2) \
{ \
- struct sedf_edom_info *d1, *d2; \
- d1 = list_entry(el1,struct sedf_edom_info, field); \
- d2 = list_entry(el2,struct sedf_edom_info, field); \
- if ((comp1) == (comp2)) \
- return 0; \
- if ((comp1) < (comp2)) \
- return -1; \
- else \
- return 1; \
+ struct sedf_edom_info *d1, *d2; \
+ d1 = list_entry(el1,struct sedf_edom_info, field); \
+ d2 = list_entry(el2,struct sedf_edom_info, field); \
+ if ((comp1) == (comp2)) \
+ return 0; \
+ if ((comp1) < (comp2)) \
+ return -1; \
+ else \
+ return 1; \
}
/* adds a domain to the queue of processes which wait for the beginning of the
next period; this list is therefore sortet by this time, which is simply
absol. deadline - period
*/
DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2))
-static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
- ASSERT(!__task_on_queue(d));
- PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
- d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
- list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
- ASSERT(__task_on_queue(d));
+ static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
+ ASSERT(!__task_on_queue(d));
+ PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
+ d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
+ list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
+ ASSERT(__task_on_queue(d));
}
/* adds a domain to the queue of processes which have started their current
task will run. As we are implementing EDF, this list is sorted by deadlines.
*/
DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
-static inline void __add_to_runqueue_sort(struct exec_domain *d) {
- PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
- d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
- list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
-}
-
-/* Initialises the queues */
-static int sedf_init_scheduler() {
- int i;
- PRINT(2,"sedf_init_scheduler was called\n");
-
- for ( i = 0; i < NR_CPUS; i++ ) {
- schedule_data[i].sched_priv =
- xmalloc(struct sedf_cpu_info);
- if ( schedule_data[i].sched_priv == NULL )
- return -1;
- INIT_LIST_HEAD(WAITQ(i));
- INIT_LIST_HEAD(RUNQ(i));
- INIT_LIST_HEAD(EXTRAQ(i,EXTRA_PEN_Q));
- INIT_LIST_HEAD(EXTRAQ(i,EXTRA_UTIL_Q));
- }
- return 0;
+ static inline void __add_to_runqueue_sort(struct exec_domain *d) {
+ PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
+ d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
+ list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
}
/* Allocates memory for per domain private scheduling data*/
static int sedf_alloc_task(struct exec_domain *d) {
- PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
- d->vcpu_id);
- if (d->domain->sched_priv == NULL) {
- if ((d->domain->sched_priv =
- xmalloc(struct sedf_dom_info)) == NULL )
- return -1;
- memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
- }
- if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
- return -1;
- memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
- return 0;
+ PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
+ d->vcpu_id);
+ if (d->domain->sched_priv == NULL) {
+ if ((d->domain->sched_priv =
+ xmalloc(struct sedf_dom_info)) == NULL )
+ return -1;
+ memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
+ }
+ if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
+ return -1;
+ memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
+ return 0;
}
/* Setup the sedf_dom_info */
static void sedf_add_task(struct exec_domain *d)
{
- struct sedf_edom_info *inf = EDOM_INFO(d);
- inf->exec_domain = d;
-
- PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
- d->vcpu_id);
-
- if (d->domain->domain_id==0) {
- /*set dom0 to something useful to boot the machine*/
- inf->period = MILLISECS(20);
- inf->slice = MILLISECS(15);
- inf->latency = 0;
- inf->deadl_abs = 0;
- inf->status = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
- }
- else {
- /*other domains run in best effort mode*/
- inf->period = WEIGHT_PERIOD;
- inf->slice = 0;
- inf->deadl_abs = 0;
- inf->latency = 0;
- inf->status = EXTRA_AWARE | SEDF_ASLEEP;
- inf->extraweight = 1;
- }
- inf->period_orig = inf->period; inf->slice_orig = inf->slice;
- INIT_LIST_HEAD(&(inf->list));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
- INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
-
- if (d->domain->domain_id != IDLE_DOMAIN_ID) {
- extraq_check(d);
- }
+ struct sedf_edom_info *inf = EDOM_INFO(d);
+ inf->exec_domain = d;
+
+ PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
+ d->vcpu_id);
+
+ /* Allocate per-CPU context if this is the first domain to be added. */
+ if ( schedule_data[d->processor].sched_priv == NULL )
+ {
+ schedule_data[d->processor].sched_priv =
+ xmalloc(struct sedf_cpu_info);
+ BUG_ON(schedule_data[d->processor].sched_priv == NULL);
+ INIT_LIST_HEAD(WAITQ(d->processor));
+ INIT_LIST_HEAD(RUNQ(d->processor));
+ INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q));
+ INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
+ }
+
+ if (d->domain->domain_id==0) {
+ /*set dom0 to something useful to boot the machine*/
+ inf->period = MILLISECS(20);
+ inf->slice = MILLISECS(15);
+ inf->latency = 0;
+ inf->deadl_abs = 0;
+ inf->status = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
+ } else {
+ /*other domains run in best effort mode*/
+ inf->period = WEIGHT_PERIOD;
+ inf->slice = 0;
+ inf->deadl_abs = 0;
+ inf->latency = 0;
+ inf->status = EXTRA_AWARE | SEDF_ASLEEP;
+ inf->extraweight = 1;
+ }
+ inf->period_orig = inf->period; inf->slice_orig = inf->slice;
+ INIT_LIST_HEAD(&(inf->list));
+ INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
+ INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
+
+ if (!is_idle_task(d->domain)) {
+ extraq_check(d);
+ } else {
+ EDOM_INFO(d)->deadl_abs = 0;
+ EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
+ }
}
/* Frees memory used by domain info */
static void sedf_free_task(struct domain *d)
{
- int i;
- PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
- ASSERT(d->sched_priv != NULL);
- xfree(d->sched_priv);
-
- for (i = 0; i < MAX_VIRT_CPUS; i++)
- if ( d->exec_domain[i] ) {
- ASSERT(d->exec_domain[i]->sched_priv != NULL);
- xfree(d->exec_domain[i]->sched_priv);
- }
-}
-
-/* Initialises idle task */
-static int sedf_init_idle_task(struct exec_domain *d) {
- PRINT(2,"sedf_init_idle_task was called, domain-id %i.%i\n",
- d->domain->domain_id, d->vcpu_id);
- if ( sedf_alloc_task(d) < 0 )
- return -1;
-
- sedf_add_task(d);
- EDOM_INFO(d)->deadl_abs = 0;
- EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
- set_bit(_VCPUF_running, &d->vcpu_flags);
- /*the idle task doesn't have to turn up on any list...*/
- return 0;
+ int i;
+ PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+ ASSERT(d->sched_priv != NULL);
+ xfree(d->sched_priv);
+
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ if ( d->exec_domain[i] ) {
+ ASSERT(d->exec_domain[i]->sched_priv != NULL);
+ xfree(d->exec_domain[i]->sched_priv);
+ }
}
/* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
static inline void desched_edf_dom (s_time_t now, struct exec_domain* d) {
- struct sedf_edom_info* inf = EDOM_INFO(d);
- /*current domain is running in real time mode*/
-
- ASSERT(__task_on_queue(d));
- /*update the domains cputime*/
- inf->cputime += now - inf->sched_start_abs;
+ struct sedf_edom_info* inf = EDOM_INFO(d);
+ /*current domain is running in real time mode*/
+
+ ASSERT(__task_on_queue(d));
+ /*update the domains cputime*/
+ inf->cputime += now - inf->sched_start_abs;
- /*scheduling decisions, which don't remove the running domain
- from the runq*/
- if ((inf->cputime < inf->slice) && sedf_runnable(d))
- return;
-
- __del_from_queue(d);
-
- /*manage bookkeeping (i.e. calculate next deadline,
- memorize overun-time of slice) of finished domains*/
- if (inf->cputime >= inf->slice) {
- inf->cputime -= inf->slice;
-
- if (inf->period < inf->period_orig) {
- /*this domain runs in latency scaling or burst mode*/
- #if (UNBLOCK == UNBLOCK_BURST)
- /*if we are runnig in burst scaling wait for two periods
- before scaling periods up again*/
- if (now - inf->unblock_abs >= 2 * inf->period)
- #endif
- {
- inf->period *= 2; inf->slice *= 2;
- if ((inf->period > inf->period_orig) ||
- (inf->slice > inf->slice_orig)) {
- /*reset slice & period*/
- inf->period = inf->period_orig;
- inf->slice = inf->slice_orig;
- }
- }
- }
- /*set next deadline*/
- inf->deadl_abs += inf->period;
- }
-
- /*add a runnable domain to the waitqueue*/
- if (sedf_runnable(d))
- __add_to_waitqueue_sort(d);
- else {
- /*we have a blocked realtime task -> remove it from exqs too*/
- #if (EXTRA > EXTRA_OFF)
- #if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
- #endif
- if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
- #endif
- }
- ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
+ /*scheduling decisions, which don't remove the running domain
+ from the runq*/
+ if ((inf->cputime < inf->slice) && sedf_runnable(d))
+ return;
+
+ __del_from_queue(d);
+
+ /*manage bookkeeping (i.e. calculate next deadline,
+ memorize overun-time of slice) of finished domains*/
+ if (inf->cputime >= inf->slice) {
+ inf->cputime -= inf->slice;
+
+ if (inf->period < inf->period_orig) {
+ /*this domain runs in latency scaling or burst mode*/
+#if (UNBLOCK == UNBLOCK_BURST)
+ /*if we are runnig in burst scaling wait for two periods
+ before scaling periods up again*/
+ if (now - inf->unblock_abs >= 2 * inf->period)
+#endif
+ {
+ inf->period *= 2; inf->slice *= 2;
+ if ((inf->period > inf->period_orig) ||
+ (inf->slice > inf->slice_orig)) {
+ /*reset slice & period*/
+ inf->period = inf->period_orig;
+ inf->slice = inf->slice_orig;
+ }
+ }
+ }
+ /*set next deadline*/
+ inf->deadl_abs += inf->period;
+ }
+
+ /*add a runnable domain to the waitqueue*/
+ if (sedf_runnable(d))
+ __add_to_waitqueue_sort(d);
+ else {
+ /*we have a blocked realtime task -> remove it from exqs too*/
+#if (EXTRA > EXTRA_OFF)
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+#endif
+ if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
+#endif
+ }
+ ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+ ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
+ sedf_runnable(d)));
}
/* Update all elements on the queues */
static inline void update_queues(s_time_t now, struct list_head* runq,
-struct list_head* waitq) {
- struct list_head *cur,*tmp;
- struct sedf_edom_info *curinf;
-
- PRINT(3,"Updating waitq..\n");
- /*check for the first elements of the waitqueue, whether their
- next period has already started*/
- list_for_each_safe(cur, tmp, waitq) {
- curinf = list_entry(cur, struct sedf_edom_info, list);
- PRINT(4,"\tLooking @ dom %i.%i\n",
- curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
- if (PERIOD_BEGIN(curinf) <= now) {
- __del_from_queue(curinf->exec_domain);
- __add_to_runqueue_sort(curinf->exec_domain);
- }
- else
- break;
- }
-
- PRINT(3,"Updating runq..\n");
- /*process the runq, find domains that are on
- the runqueue which shouldn't be there*/
- list_for_each_safe(cur, tmp, runq) {
- curinf = list_entry(cur,struct sedf_edom_info,list);
- PRINT(4,"\tLooking @ dom %i.%i\n",
- curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
- if (unlikely(curinf->slice == 0)) {
- /*ignore domains with empty slice*/
- PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
- curinf->exec_domain->domain->domain_id,
- curinf->exec_domain->vcpu_id);
- __del_from_queue(curinf->exec_domain);
-
- /*move them to their next period*/
- curinf->deadl_abs += curinf->period;
- /*and put them back into the queue*/
- __add_to_waitqueue_sort(curinf->exec_domain);
- continue;
- }
- if (unlikely((curinf->deadl_abs < now) ||
- (curinf->cputime > curinf->slice))) {
- /*we missed the deadline or the slice was
- already finished... might hapen because
- of dom_adj.*/
- PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
- "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
- " cputime: %"PRIu64"\n",
- curinf->exec_domain->domain->domain_id,
- curinf->exec_domain->vcpu_id,
- curinf->deadl_abs, curinf->slice, now,
- curinf->cputime);
- __del_from_queue(curinf->exec_domain);
- /*common case: we miss one period!*/
- curinf->deadl_abs += curinf->period;
-
- /*if we are still behind: modulo arithmetic,
- force deadline to be in future and
- aligned to period borders!*/
- if (unlikely(curinf->deadl_abs < now))
- curinf->deadl_abs +=
- DIV_UP(now - curinf->deadl_abs,
- curinf->period) * curinf->period;
- ASSERT(curinf->deadl_abs > now);
- /*give a fresh slice*/
- curinf->cputime = 0;
- if (PERIOD_BEGIN(curinf) > now)
- __add_to_waitqueue_sort(curinf->exec_domain);
- else
- __add_to_runqueue_sort(curinf->exec_domain);
- }
- else
- break;
- }
- PRINT(3,"done updating the queues\n");
+ struct list_head* waitq) {
+ struct list_head *cur,*tmp;
+ struct sedf_edom_info *curinf;
+
+ PRINT(3,"Updating waitq..\n");
+ /*check for the first elements of the waitqueue, whether their
+ next period has already started*/
+ list_for_each_safe(cur, tmp, waitq) {
+ curinf = list_entry(cur, struct sedf_edom_info, list);
+ PRINT(4,"\tLooking @ dom %i.%i\n",
+ curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
+ if (PERIOD_BEGIN(curinf) <= now) {
+ __del_from_queue(curinf->exec_domain);
+ __add_to_runqueue_sort(curinf->exec_domain);
+ }
+ else
+ break;
+ }
+
+ PRINT(3,"Updating runq..\n");
+ /*process the runq, find domains that are on
+ the runqueue which shouldn't be there*/
+ list_for_each_safe(cur, tmp, runq) {
+ curinf = list_entry(cur,struct sedf_edom_info,list);
+ PRINT(4,"\tLooking @ dom %i.%i\n",
+ curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
+ if (unlikely(curinf->slice == 0)) {
+ /*ignore domains with empty slice*/
+ PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
+ curinf->exec_domain->domain->domain_id,
+ curinf->exec_domain->vcpu_id);
+ __del_from_queue(curinf->exec_domain);
+
+ /*move them to their next period*/
+ curinf->deadl_abs += curinf->period;
+ /*and put them back into the queue*/
+ __add_to_waitqueue_sort(curinf->exec_domain);
+ continue;
+ }
+ if (unlikely((curinf->deadl_abs < now) ||
+ (curinf->cputime > curinf->slice))) {
+ /*we missed the deadline or the slice was
+ already finished... might hapen because
+ of dom_adj.*/
+ PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
+ "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
+ " cputime: %"PRIu64"\n",
+ curinf->exec_domain->domain->domain_id,
+ curinf->exec_domain->vcpu_id,
+ curinf->deadl_abs, curinf->slice, now,
+ curinf->cputime);
+ __del_from_queue(curinf->exec_domain);
+ /*common case: we miss one period!*/
+ curinf->deadl_abs += curinf->period;
+
+ /*if we are still behind: modulo arithmetic,
+ force deadline to be in future and
+ aligned to period borders!*/
+ if (unlikely(curinf->deadl_abs < now))
+ curinf->deadl_abs +=
+ DIV_UP(now - curinf->deadl_abs,
+ curinf->period) * curinf->period;
+ ASSERT(curinf->deadl_abs > now);
+ /*give a fresh slice*/
+ curinf->cputime = 0;
+ if (PERIOD_BEGIN(curinf) > now)
+ __add_to_waitqueue_sort(curinf->exec_domain);
+ else
+ __add_to_runqueue_sort(curinf->exec_domain);
+ }
+ else
+ break;
+ }
+ PRINT(3,"done updating the queues\n");
}
#if (EXTRA > EXTRA_OFF)
if the domain is blocked / has regained its short-block-loss
time it is not put on any queue */
static inline void desched_extra_dom(s_time_t now, struct exec_domain* d) {
- struct sedf_edom_info *inf = EDOM_INFO(d);
- int i = extra_get_cur_q(inf);
-
+ struct sedf_edom_info *inf = EDOM_INFO(d);
+ int i = extra_get_cur_q(inf);
+
#if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
- unsigned long oldscore;
+ unsigned long oldscore;
#endif
- ASSERT(extraq_on(d, i));
- /*unset all running flags*/
- inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
- /*fresh slice for the next run*/
- inf->cputime = 0;
- /*accumulate total extratime*/
- inf->extra_time_tot += now - inf->sched_start_abs;
- /*remove extradomain from head of the queue*/
- extraq_del(d, i);
+ ASSERT(extraq_on(d, i));
+ /*unset all running flags*/
+ inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
+ /*fresh slice for the next run*/
+ inf->cputime = 0;
+ /*accumulate total extratime*/
+ inf->extra_time_tot += now - inf->sched_start_abs;
+ /*remove extradomain from head of the queue*/
+ extraq_del(d, i);
#if (EXTRA == EXTRA_ROUNDR)
- if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
- /*add to the tail if it is runnable => round-robin*/
- extraq_add_tail(d, EXTRA_UTIL_Q);
+ if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+ /*add to the tail if it is runnable => round-robin*/
+ extraq_add_tail(d, EXTRA_UTIL_Q);
#elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
- /*update the score*/
- oldscore = inf->score[i];
+ /*update the score*/
+ oldscore = inf->score[i];
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (i == EXTRA_PEN_Q) {
- /*domain was running in L0 extraq*/
- /*reduce block lost, probably more sophistication here!*/
- /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
- inf->short_block_lost_tot -= now - inf->sched_start_abs;
- PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
- inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
- inf->short_block_lost_tot);
- if (inf->short_block_lost_tot <= 0) {
- PRINT(4,"Domain %i.%i compensated short block loss!\n",
- inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
- /*we have (over-)compensated our block penalty*/
- inf->short_block_lost_tot = 0;
- /*we don't want a place on the penalty queue anymore!*/
- inf->status &= ~EXTRA_WANT_PEN_Q;
- goto check_extra_queues;
- }
- /*we have to go again for another try in the block-extraq,
- the score is not used incremantally here, as this is
- already done by recalculating the block_lost*/
- inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
- inf->short_block_lost_tot;
- oldscore = 0;
- } else
+ if (i == EXTRA_PEN_Q) {
+ /*domain was running in L0 extraq*/
+ /*reduce block lost, probably more sophistication here!*/
+ /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
+ inf->short_block_lost_tot -= now - inf->sched_start_abs;
+ PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
+ inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
+ inf->short_block_lost_tot);
+ if (inf->short_block_lost_tot <= 0) {
+ PRINT(4,"Domain %i.%i compensated short block loss!\n",
+ inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
+ /*we have (over-)compensated our block penalty*/
+ inf->short_block_lost_tot = 0;
+ /*we don't want a place on the penalty queue anymore!*/
+ inf->status &= ~EXTRA_WANT_PEN_Q;
+ goto check_extra_queues;
+ }
+ /*we have to go again for another try in the block-extraq,
+ the score is not used incremantally here, as this is
+ already done by recalculating the block_lost*/
+ inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
+ inf->short_block_lost_tot;
+ oldscore = 0;
+ } else
#endif
- {
- /*domain was running in L1 extraq => score is inverse of
- utilization and is used somewhat incremental!*/
- if (!inf->extraweight)
- /*NB: use fixed point arithmetic with 10 bits*/
- inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
- inf->slice;
- else
- /*give a domain w/ exweight = 1 as much as a domain with
- util = 1/128*/
- inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
- }
-check_extra_queues:
- /* Adding a runnable domain to the right queue and removing blocked ones*/
- if (sedf_runnable(d)) {
- /*add according to score: weighted round robin*/
- if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
- extraq_add_sort_update(d, i, oldscore);
- }
- else {
- /*remove this blocked domain from the waitq!*/
- __del_from_queue(d);
+ {
+ /*domain was running in L1 extraq => score is inverse of
+ utilization and is used somewhat incremental!*/
+ if (!inf->extraweight)
+ /*NB: use fixed point arithmetic with 10 bits*/
+ inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
+ inf->slice;
+ else
+ /*give a domain w/ exweight = 1 as much as a domain with
+ util = 1/128*/
+ inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
+ }
+ check_extra_queues:
+ /* Adding a runnable domain to the right queue and removing blocked ones*/
+ if (sedf_runnable(d)) {
+ /*add according to score: weighted round robin*/
+ if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
+ extraq_add_sort_update(d, i, oldscore);
+ }
+ else {
+ /*remove this blocked domain from the waitq!*/
+ __del_from_queue(d);
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- /*make sure that we remove a blocked domain from the other
- extraq too*/
- if (i == EXTRA_PEN_Q) {
- if (extraq_on(d, EXTRA_UTIL_Q))
- extraq_del(d, EXTRA_UTIL_Q);
- }
- else {
- if (extraq_on(d, EXTRA_PEN_Q))
- extraq_del(d, EXTRA_PEN_Q);
- }
+ /*make sure that we remove a blocked domain from the other
+ extraq too*/
+ if (i == EXTRA_PEN_Q) {
+ if (extraq_on(d, EXTRA_UTIL_Q))
+ extraq_del(d, EXTRA_UTIL_Q);
+ }
+ else {
+ if (extraq_on(d, EXTRA_PEN_Q))
+ extraq_del(d, EXTRA_PEN_Q);
+ }
#endif
- }
+ }
#endif
- ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
- ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
- sedf_runnable(d)));
+ ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+ ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
+ sedf_runnable(d)));
}
#endif
static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
- s_time_t end_xt, struct list_head *extraq[], int cpu) {
- struct task_slice ret;
- struct sedf_edom_info *runinf;
-
- /* Enough time left to use for extratime? */
- if (end_xt - now < EXTRA_QUANTUM)
- goto return_idle;
+ s_time_t end_xt, struct list_head *extraq[], int cpu) {
+ struct task_slice ret;
+ struct sedf_edom_info *runinf;
+
+ /* Enough time left to use for extratime? */
+ if (end_xt - now < EXTRA_QUANTUM)
+ goto return_idle;
#if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (!list_empty(extraq[EXTRA_PEN_Q])) {
- /*we still have elements on the level 0 extraq
- => let those run first!*/
- runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
- struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
- runinf->status |= EXTRA_RUN_PEN;
- ret.task = runinf->exec_domain;
- ret.time = EXTRA_QUANTUM;
+ if (!list_empty(extraq[EXTRA_PEN_Q])) {
+ /*we still have elements on the level 0 extraq
+ => let those run first!*/
+ runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
+ struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
+ runinf->status |= EXTRA_RUN_PEN;
+ ret.task = runinf->exec_domain;
+ ret.time = EXTRA_QUANTUM;
#ifdef SEDF_STATS
- runinf->pen_extra_slices++;
+ runinf->pen_extra_slices++;
#endif
- } else
+ } else
#endif
- if (!list_empty(extraq[EXTRA_UTIL_Q])) {
- /*use elements from the normal extraqueue*/
- runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
- struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
- runinf->status |= EXTRA_RUN_UTIL;
- ret.task = runinf->exec_domain;
- ret.time = EXTRA_QUANTUM;
- }
- else
- goto return_idle;
+ if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+ /*use elements from the normal extraqueue*/
+ runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
+ struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
+ runinf->status |= EXTRA_RUN_UTIL;
+ ret.task = runinf->exec_domain;
+ ret.time = EXTRA_QUANTUM;
+ }
+ else
+ goto return_idle;
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
-
-return_idle:
- ret.task = IDLETASK(cpu);
- ret.time = end_xt - now;
- ASSERT(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
+ ASSERT(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ return ret;
+
+ return_idle:
+ ret.task = IDLETASK(cpu);
+ ret.time = end_xt - now;
+ ASSERT(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ return ret;
}
/* Main scheduling function
Reasons for calling this function are:
-and various others ;) in general: determine which domain to run next*/
static struct task_slice sedf_do_schedule(s_time_t now)
{
- int cpu = current->processor;
- struct list_head *runq = RUNQ(cpu);
- struct list_head *waitq = WAITQ(cpu);
- #if (EXTRA > EXTRA_OFF)
- struct sedf_edom_info *inf = EDOM_INFO(current);
- struct list_head *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
- EXTRAQ(cpu, EXTRA_UTIL_Q)};
- #endif
- struct task_slice ret;
- /*int i = 0;*/
- /*idle tasks don't need any of the following stuf*/
- if (is_idle_task(current->domain))
- goto check_waitq;
-
- /* create local state of the status of the domain, in order to avoid
- inconsistent state during scheduling decisions, because data for
- domain_runnable is not protected by the scheduling lock!*/
- if(!domain_runnable(current))
- inf->status |= SEDF_ASLEEP;
-
- if (inf->status & SEDF_ASLEEP)
- inf->block_abs = now;
+ int cpu = current->processor;
+ struct list_head *runq = RUNQ(cpu);
+ struct list_head *waitq = WAITQ(cpu);
+#if (EXTRA > EXTRA_OFF)
+ struct sedf_edom_info *inf = EDOM_INFO(current);
+ struct list_head *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
+ EXTRAQ(cpu, EXTRA_UTIL_Q)};
+#endif
+ struct task_slice ret;
+ /*int i = 0;*/
+ /*idle tasks don't need any of the following stuf*/
+ if (is_idle_task(current->domain))
+ goto check_waitq;
+
+ /* create local state of the status of the domain, in order to avoid
+ inconsistent state during scheduling decisions, because data for
+ domain_runnable is not protected by the scheduling lock!*/
+ if(!domain_runnable(current))
+ inf->status |= SEDF_ASLEEP;
+
+ if (inf->status & SEDF_ASLEEP)
+ inf->block_abs = now;
- #if (EXTRA > EXTRA_OFF)
- if (unlikely(extra_runs(inf))) {
- /*special treatment of domains running in extra time*/
- desched_extra_dom(now, current);
- }
- else
- #endif
- {
- desched_edf_dom(now, current);
- }
-check_waitq:
- update_queues(now, runq, waitq);
-
- /*now simply pick the first domain from the runqueue, which has the
- earliest deadline, because the list is sorted*/
- struct sedf_edom_info *runinf, *waitinf;
-
- if (!list_empty(runq)) {
- runinf = list_entry(runq->next,struct sedf_edom_info,list);
- ret.task = runinf->exec_domain;
- if (!list_empty(waitq)) {
- waitinf = list_entry(waitq->next,
- struct sedf_edom_info,list);
- /*rerun scheduler, when scheduled domain reaches it's
- end of slice or the first domain from the waitqueue
- gets ready*/
- ret.time = MIN(now + runinf->slice - runinf->cputime,
- PERIOD_BEGIN(waitinf)) - now;
- }
- else {
- ret.time = runinf->slice - runinf->cputime;
- }
- CHECK(ret.time > 0);
- goto sched_done;
- }
-
- if (!list_empty(waitq)) {
- waitinf = list_entry(waitq->next,struct sedf_edom_info, list);
- /*we could not find any suitable domain
- => look for domains that are aware of extratime*/
- #if (EXTRA > EXTRA_OFF)
- ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
- extraq, cpu);
- #else
- ret.task = IDLETASK(cpu);
- ret.time = PERIOD_BEGIN(waitinf) - now;
- #endif
- CHECK(ret.time > 0);
- }
- else {
- /*this could probably never happen, but one never knows...*/
- /*it can... imagine a second CPU, which is pure scifi ATM,
- but one never knows ;)*/
- ret.task = IDLETASK(cpu);
- ret.time = SECONDS(1);
- }
+#if (EXTRA > EXTRA_OFF)
+ if (unlikely(extra_runs(inf))) {
+ /*special treatment of domains running in extra time*/
+ desched_extra_dom(now, current);
+ }
+ else
+#endif
+ {
+ desched_edf_dom(now, current);
+ }
+ check_waitq:
+ update_queues(now, runq, waitq);
+
+ /*now simply pick the first domain from the runqueue, which has the
+ earliest deadline, because the list is sorted*/
+ struct sedf_edom_info *runinf, *waitinf;
+
+ if (!list_empty(runq)) {
+ runinf = list_entry(runq->next,struct sedf_edom_info,list);
+ ret.task = runinf->exec_domain;
+ if (!list_empty(waitq)) {
+ waitinf = list_entry(waitq->next,
+ struct sedf_edom_info,list);
+ /*rerun scheduler, when scheduled domain reaches it's
+ end of slice or the first domain from the waitqueue
+ gets ready*/
+ ret.time = MIN(now + runinf->slice - runinf->cputime,
+ PERIOD_BEGIN(waitinf)) - now;
+ }
+ else {
+ ret.time = runinf->slice - runinf->cputime;
+ }
+ CHECK(ret.time > 0);
+ goto sched_done;
+ }
+
+ if (!list_empty(waitq)) {
+ waitinf = list_entry(waitq->next,struct sedf_edom_info, list);
+ /*we could not find any suitable domain
+ => look for domains that are aware of extratime*/
+#if (EXTRA > EXTRA_OFF)
+ ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
+ extraq, cpu);
+#else
+ ret.task = IDLETASK(cpu);
+ ret.time = PERIOD_BEGIN(waitinf) - now;
+#endif
+ CHECK(ret.time > 0);
+ }
+ else {
+ /*this could probably never happen, but one never knows...*/
+ /*it can... imagine a second CPU, which is pure scifi ATM,
+ but one never knows ;)*/
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ }
-sched_done:
- /*TODO: Do something USEFUL when this happens and find out, why it
- still can happen!!!*/
- if (ret.time<0) {
- printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
- ret.time);
- ret.time = EXTRA_QUANTUM;
- }
- EDOM_INFO(ret.task)->sched_start_abs = now;
- CHECK(ret.time > 0);
- ASSERT(sedf_runnable(ret.task));
- return ret;
+ sched_done:
+ /*TODO: Do something USEFUL when this happens and find out, why it
+ still can happen!!!*/
+ if (ret.time<0) {
+ printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
+ ret.time);
+ ret.time = EXTRA_QUANTUM;
+ }
+ EDOM_INFO(ret.task)->sched_start_abs = now;
+ CHECK(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ return ret;
}
static void sedf_sleep(struct exec_domain *d) {
- PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
-
- if (is_idle_task(d->domain))
- return;
+ PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+
+ if (is_idle_task(d->domain))
+ return;
- EDOM_INFO(d)->status |= SEDF_ASLEEP;
-
- if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(d->processor);
+ EDOM_INFO(d)->status |= SEDF_ASLEEP;
+
+ if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+ cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+ }
+ else {
+ if ( __task_on_queue(d) )
+ __del_from_queue(d);
+#if (EXTRA > EXTRA_OFF)
+ if (extraq_on(d, EXTRA_UTIL_Q))
+ extraq_del(d, EXTRA_UTIL_Q);
+#endif
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (extraq_on(d, EXTRA_PEN_Q))
+ extraq_del(d, EXTRA_PEN_Q);
#endif
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
- }
- else {
- if ( __task_on_queue(d) )
- __del_from_queue(d);
- #if (EXTRA > EXTRA_OFF)
- if (extraq_on(d, EXTRA_UTIL_Q))
- extraq_del(d, EXTRA_UTIL_Q);
- #endif
- #if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (extraq_on(d, EXTRA_PEN_Q))
- extraq_del(d, EXTRA_PEN_Q);
- #endif
- }
+ }
}
/* This function wakes up a domain, i.e. moves them into the waitqueue
*/
static inline void unblock_short_vcons
(struct sedf_edom_info* inf, s_time_t now) {
- inf->deadl_abs += inf->period;
- inf->cputime = 0;
+ inf->deadl_abs += inf->period;
+ inf->cputime = 0;
}
static inline void unblock_short_cons(struct sedf_edom_info* inf, s_time_t now)
{
- /*treat blocked time as consumed by the domain*/
- inf->cputime += now - inf->block_abs;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
- /*we don't have a reasonable amount of time in
- our slice left :( => start in next period!*/
- unblock_short_vcons(inf, now);
- }
+ /*treat blocked time as consumed by the domain*/
+ inf->cputime += now - inf->block_abs;
+ if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ /*we don't have a reasonable amount of time in
+ our slice left :( => start in next period!*/
+ unblock_short_vcons(inf, now);
+ }
#ifdef SEDF_STATS
- else
- inf->short_cont++;
+ else
+ inf->short_cont++;
#endif
}
static inline void unblock_short_extra_support (struct sedf_edom_info* inf,
- s_time_t now) {
- /*this unblocking scheme tries to support the domain, by assigning it
- a priority in extratime distribution according to the loss of time
- in this slice due to blocking*/
- s_time_t pen;
-
- /*no more realtime execution in this period!*/
- inf->deadl_abs += inf->period;
- if (likely(inf->block_abs)) {
- //treat blocked time as consumed by the domain*/
- /*inf->cputime += now - inf->block_abs;*/
- /*penalty is time the domain would have
- had if it continued to run */
- pen = (inf->slice - inf->cputime);
- if (pen < 0) pen = 0;
- /*accumulate all penalties over the periods*/
- /*inf->short_block_lost_tot += pen;*/
- /*set penalty to the current value*/
- inf->short_block_lost_tot = pen;
- /*not sure which one is better.. but seems to work well...*/
-
- if (inf->short_block_lost_tot) {
- inf->score[0] = (inf->period << 10) /
- inf->short_block_lost_tot;
+ s_time_t now) {
+ /*this unblocking scheme tries to support the domain, by assigning it
+ a priority in extratime distribution according to the loss of time
+ in this slice due to blocking*/
+ s_time_t pen;
+
+ /*no more realtime execution in this period!*/
+ inf->deadl_abs += inf->period;
+ if (likely(inf->block_abs)) {
+ //treat blocked time as consumed by the domain*/
+ /*inf->cputime += now - inf->block_abs;*/
+ /*penalty is time the domain would have
+ had if it continued to run */
+ pen = (inf->slice - inf->cputime);
+ if (pen < 0) pen = 0;
+ /*accumulate all penalties over the periods*/
+ /*inf->short_block_lost_tot += pen;*/
+ /*set penalty to the current value*/
+ inf->short_block_lost_tot = pen;
+ /*not sure which one is better.. but seems to work well...*/
+
+ if (inf->short_block_lost_tot) {
+ inf->score[0] = (inf->period << 10) /
+ inf->short_block_lost_tot;
#ifdef SEDF_STATS
- inf->pen_extra_blocks++;
+ inf->pen_extra_blocks++;
#endif
- if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
- /*remove domain for possible resorting!*/
- extraq_del(inf->exec_domain, EXTRA_PEN_Q);
- else
- /*remember that we want to be on the penalty q
- so that we can continue when we (un-)block
- in penalty-extratime*/
- inf->status |= EXTRA_WANT_PEN_Q;
-
- /*(re-)add domain to the penalty extraq*/
- extraq_add_sort_update(inf->exec_domain,
- EXTRA_PEN_Q, 0);
- }
- }
- /*give it a fresh slice in the next period!*/
- inf->cputime = 0;
+ if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
+ /*remove domain for possible resorting!*/
+ extraq_del(inf->exec_domain, EXTRA_PEN_Q);
+ else
+ /*remember that we want to be on the penalty q
+ so that we can continue when we (un-)block
+ in penalty-extratime*/
+ inf->status |= EXTRA_WANT_PEN_Q;
+
+ /*(re-)add domain to the penalty extraq*/
+ extraq_add_sort_update(inf->exec_domain,
+ EXTRA_PEN_Q, 0);
+ }
+ }
+ /*give it a fresh slice in the next period!*/
+ inf->cputime = 0;
}
static inline void unblock_long_vcons(struct sedf_edom_info* inf, s_time_t now)
{
- /* align to next future period */
- inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
- * inf->period;
- inf->cputime = 0;
+ /* align to next future period */
+ inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
+ * inf->period;
+ inf->cputime = 0;
}
static inline void unblock_long_cons_a (struct sedf_edom_info* inf,
- s_time_t now) {
- /*treat the time the domain was blocked in the
- CURRENT period as consumed by the domain*/
- inf->cputime = (now - inf->deadl_abs) % inf->period;
- if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
- /*we don't have a reasonable amount of time in our slice
- left :( => start in next period!*/
- unblock_long_vcons(inf, now);
- }
+ s_time_t now) {
+ /*treat the time the domain was blocked in the
+ CURRENT period as consumed by the domain*/
+ inf->cputime = (now - inf->deadl_abs) % inf->period;
+ if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ /*we don't have a reasonable amount of time in our slice
+ left :( => start in next period!*/
+ unblock_long_vcons(inf, now);
+ }
}
static inline void unblock_long_cons_b(struct sedf_edom_info* inf,s_time_t now) {
- /*Conservative 2b*/
- /*Treat the unblocking time as a start of a new period */
- inf->deadl_abs = now + inf->period;
- inf->cputime = 0;
+ /*Conservative 2b*/
+ /*Treat the unblocking time as a start of a new period */
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
}
static inline void unblock_long_cons_c(struct sedf_edom_info* inf,s_time_t now) {
- if (likely(inf->latency)) {
- /*scale the slice and period accordingly to the latency hint*/
- /*reduce period temporarily to the latency hint*/
- inf->period = inf->latency;
- /*this results in max. 4s slice/period length*/
- ASSERT((inf->period < ULONG_MAX)
- && (inf->slice_orig < ULONG_MAX));
- /*scale slice accordingly, so that utilisation stays the same*/
- inf->slice = (inf->period * inf->slice_orig)
- / inf->period_orig;
- inf->deadl_abs = now + inf->period;
- inf->cputime = 0;
- }
- else {
- /*we don't have a latency hint.. use some other technique*/
- unblock_long_cons_b(inf, now);
- }
+ if (likely(inf->latency)) {
+ /*scale the slice and period accordingly to the latency hint*/
+ /*reduce period temporarily to the latency hint*/
+ inf->period = inf->latency;
+ /*this results in max. 4s slice/period length*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
+ }
+ else {
+ /*we don't have a latency hint.. use some other technique*/
+ unblock_long_cons_b(inf, now);
+ }
}
/*a new idea of dealing with short blocks: burst period scaling*/
static inline void unblock_short_burst(struct sedf_edom_info* inf, s_time_t now)
{
- /*treat blocked time as consumed by the domain*/
- inf->cputime += now - inf->block_abs;
-
- if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
- /*if we can still use some time in the current slice
- then use it!*/
+ /*treat blocked time as consumed by the domain*/
+ inf->cputime += now - inf->block_abs;
+
+ if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+ /*if we can still use some time in the current slice
+ then use it!*/
#ifdef SEDF_STATS
- /*we let the domain run in the current period*/
- inf->short_cont++;
+ /*we let the domain run in the current period*/
+ inf->short_cont++;
#endif
- }
- else {
- /*we don't have a reasonable amount of time in
- our slice left => switch to burst mode*/
- if (likely(inf->unblock_abs)) {
- /*set the period-length to the current blocking
- interval, possible enhancements: average over last
- blocking intervals, user-specified minimum,...*/
- inf->period = now - inf->unblock_abs;
- /*check for overflow on multiplication*/
- ASSERT((inf->period < ULONG_MAX)
- && (inf->slice_orig < ULONG_MAX));
- /*scale slice accordingly, so that utilisation
- stays the same*/
- inf->slice = (inf->period * inf->slice_orig)
- / inf->period_orig;
- /*set new (shorter) deadline*/
- inf->deadl_abs += inf->period;
- }
- else {
- /*in case we haven't unblocked before
- start in next period!*/
- inf->cputime=0;
- inf->deadl_abs += inf->period;
- }
- }
- inf->unblock_abs = now;
+ }
+ else {
+ /*we don't have a reasonable amount of time in
+ our slice left => switch to burst mode*/
+ if (likely(inf->unblock_abs)) {
+ /*set the period-length to the current blocking
+ interval, possible enhancements: average over last
+ blocking intervals, user-specified minimum,...*/
+ inf->period = now - inf->unblock_abs;
+ /*check for overflow on multiplication*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation
+ stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ /*set new (shorter) deadline*/
+ inf->deadl_abs += inf->period;
+ }
+ else {
+ /*in case we haven't unblocked before
+ start in next period!*/
+ inf->cputime=0;
+ inf->deadl_abs += inf->period;
+ }
+ }
+ inf->unblock_abs = now;
}
static inline void unblock_long_burst(struct sedf_edom_info* inf, s_time_t now) {
- if (unlikely(inf->latency && (inf->period > inf->latency))) {
- /*scale the slice and period accordingly to the latency hint*/
- inf->period = inf->latency;
- /*check for overflows on multiplication*/
- ASSERT((inf->period < ULONG_MAX)
- && (inf->slice_orig < ULONG_MAX));
- /*scale slice accordingly, so that utilisation stays the same*/
- inf->slice = (inf->period * inf->slice_orig)
- / inf->period_orig;
- inf->deadl_abs = now + inf->period;
- inf->cputime = 0;
- }
- else {
- /*we don't have a latency hint.. or we are currently in
- "burst mode": use some other technique
- NB: this should be in fact the normal way of operation,
- when we are in sync with the device!*/
- unblock_long_cons_b(inf, now);
- }
- inf->unblock_abs = now;
+ if (unlikely(inf->latency && (inf->period > inf->latency))) {
+ /*scale the slice and period accordingly to the latency hint*/
+ inf->period = inf->latency;
+ /*check for overflows on multiplication*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
+ }
+ else {
+ /*we don't have a latency hint.. or we are currently in
+ "burst mode": use some other technique
+ NB: this should be in fact the normal way of operation,
+ when we are in sync with the device!*/
+ unblock_long_cons_b(inf, now);
+ }
+ inf->unblock_abs = now;
}
-#define DOMAIN_EDF 1
-#define DOMAIN_EXTRA_PEN 2
-#define DOMAIN_EXTRA_UTIL 3
-#define DOMAIN_IDLE 4
+#define DOMAIN_EDF 1
+#define DOMAIN_EXTRA_PEN 2
+#define DOMAIN_EXTRA_UTIL 3
+#define DOMAIN_IDLE 4
static inline int get_run_type(struct exec_domain* d) {
- struct sedf_edom_info* inf = EDOM_INFO(d);
- if (is_idle_task(d->domain))
- return DOMAIN_IDLE;
- if (inf->status & EXTRA_RUN_PEN)
- return DOMAIN_EXTRA_PEN;
- if (inf->status & EXTRA_RUN_UTIL)
- return DOMAIN_EXTRA_UTIL;
- return DOMAIN_EDF;
+ struct sedf_edom_info* inf = EDOM_INFO(d);
+ if (is_idle_task(d->domain))
+ return DOMAIN_IDLE;
+ if (inf->status & EXTRA_RUN_PEN)
+ return DOMAIN_EXTRA_PEN;
+ if (inf->status & EXTRA_RUN_UTIL)
+ return DOMAIN_EXTRA_UTIL;
+ return DOMAIN_EDF;
}
/*Compares two domains in the relation of whether the one is allowed to
interrupt the others execution.
It returns true (!=0) if a switch to the other domain is good.
Current Priority scheme is as follows:
- EDF > L0 (penalty based) extra-time >
- L1 (utilization) extra-time > idle-domain
+ EDF > L0 (penalty based) extra-time >
+ L1 (utilization) extra-time > idle-domain
In the same class priorities are assigned as following:
- EDF: early deadline > late deadline
- L0 extra-time: lower score > higher score*/
+ EDF: early deadline > late deadline
+ L0 extra-time: lower score > higher score*/
static inline int should_switch(struct exec_domain* cur,
- struct exec_domain* other, s_time_t now) {
- struct sedf_edom_info *cur_inf, *other_inf;
- cur_inf = EDOM_INFO(cur);
- other_inf = EDOM_INFO(other);
-
- /*check whether we need to make an earlier sched-decision*/
- if ((PERIOD_BEGIN(other_inf) <
- schedule_data[other->processor].s_timer.expires))
- return 1;
- /*no timing-based switches need to be taken into account here*/
- switch (get_run_type(cur)) {
- case DOMAIN_EDF:
- /* do not interrupt a running EDF domain */
- return 0;
- case DOMAIN_EXTRA_PEN:
- /*check whether we also want
- the L0 ex-q with lower score*/
- if ((other_inf->status & EXTRA_WANT_PEN_Q)
- && (other_inf->score[EXTRA_PEN_Q] <
- cur_inf->score[EXTRA_PEN_Q]))
- return 1;
- else return 0;
- case DOMAIN_EXTRA_UTIL:
- /*check whether we want the L0 extraq, don't
- switch if both domains want L1 extraq */
- if (other_inf->status & EXTRA_WANT_PEN_Q)
- return 1;
- else return 0;
- case DOMAIN_IDLE:
- return 1;
- }
- return 1;
+ struct exec_domain* other, s_time_t now) {
+ struct sedf_edom_info *cur_inf, *other_inf;
+ cur_inf = EDOM_INFO(cur);
+ other_inf = EDOM_INFO(other);
+
+ /*check whether we need to make an earlier sched-decision*/
+ if ((PERIOD_BEGIN(other_inf) <
+ schedule_data[other->processor].s_timer.expires))
+ return 1;
+ /*no timing-based switches need to be taken into account here*/
+ switch (get_run_type(cur)) {
+ case DOMAIN_EDF:
+ /* do not interrupt a running EDF domain */
+ return 0;
+ case DOMAIN_EXTRA_PEN:
+ /*check whether we also want
+ the L0 ex-q with lower score*/
+ if ((other_inf->status & EXTRA_WANT_PEN_Q)
+ && (other_inf->score[EXTRA_PEN_Q] <
+ cur_inf->score[EXTRA_PEN_Q]))
+ return 1;
+ else return 0;
+ case DOMAIN_EXTRA_UTIL:
+ /*check whether we want the L0 extraq, don't
+ switch if both domains want L1 extraq */
+ if (other_inf->status & EXTRA_WANT_PEN_Q)
+ return 1;
+ else return 0;
+ case DOMAIN_IDLE:
+ return 1;
+ }
+ return 1;
}
void sedf_wake(struct exec_domain *d) {
- s_time_t now = NOW();
- struct sedf_edom_info* inf = EDOM_INFO(d);
-
- PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
-
- if (unlikely(is_idle_task(d->domain)))
- return;
-
- if ( unlikely(__task_on_queue(d)) ) {
- PRINT(3,"\tdomain %i.%i is already in some queue\n",
- d->domain->domain_id, d->vcpu_id);
- return;
- }
- ASSERT(!sedf_runnable(d));
- inf->status &= ~SEDF_ASLEEP;
- ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
- ASSERT(!extraq_on(d, EXTRA_PEN_Q));
-
- if (unlikely(inf->deadl_abs == 0))
- /*initial setup of the deadline*/
- inf->deadl_abs = now + inf->slice;
-
- PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
- inf->period, now);
-#ifdef SEDF_STATS
- inf->block_tot++;
+ s_time_t now = NOW();
+ struct sedf_edom_info* inf = EDOM_INFO(d);
+
+ PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+
+ if (unlikely(is_idle_task(d->domain)))
+ return;
+
+ if ( unlikely(__task_on_queue(d)) ) {
+ PRINT(3,"\tdomain %i.%i is already in some queue\n",
+ d->domain->domain_id, d->vcpu_id);
+ return;
+ }
+ ASSERT(!sedf_runnable(d));
+ inf->status &= ~SEDF_ASLEEP;
+ ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
+ ASSERT(!extraq_on(d, EXTRA_PEN_Q));
+
+ if (unlikely(inf->deadl_abs == 0))
+ /*initial setup of the deadline*/
+ inf->deadl_abs = now + inf->slice;
+
+ PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+ "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+ inf->period, now);
+#ifdef SEDF_STATS
+ inf->block_tot++;
+#endif
+ if (unlikely(now < PERIOD_BEGIN(inf))) {
+ PRINT(4,"extratime unblock\n");
+ /* unblocking in extra-time! */
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (inf->status & EXTRA_WANT_PEN_Q) {
+ /*we have a domain that wants compensation
+ for block penalty and did just block in
+ its compensation time. Give it another
+ chance!*/
+ extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+ }
#endif
- if (unlikely(now < PERIOD_BEGIN(inf))) {
- PRINT(4,"extratime unblock\n");
- /* unblocking in extra-time! */
- #if (EXTRA == EXTRA_BLOCK_WEIGHT)
- if (inf->status & EXTRA_WANT_PEN_Q) {
- /*we have a domain that wants compensation
- for block penalty and did just block in
- its compensation time. Give it another
- chance!*/
- extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
- }
- #endif
- extraq_check_add_unblocked(d, 0);
- }
- else {
- if (now < inf->deadl_abs) {
- PRINT(4,"short unblocking\n");
- /*short blocking*/
+ extraq_check_add_unblocked(d, 0);
+ }
+ else {
+ if (now < inf->deadl_abs) {
+ PRINT(4,"short unblocking\n");
+ /*short blocking*/
#ifdef SEDF_STATS
- inf->short_block_tot++;
+ inf->short_block_tot++;
+#endif
+#if (UNBLOCK <= UNBLOCK_ATROPOS)
+ unblock_short_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+ unblock_short_cons(inf, now);
+#elif (UNBLOCK == UNBLOCK_BURST)
+ unblock_short_burst(inf, now);
+#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+ unblock_short_extra_support(inf, now);
#endif
- #if (UNBLOCK <= UNBLOCK_ATROPOS)
- unblock_short_vcons(inf, now);
- #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
- unblock_short_cons(inf, now);
- #elif (UNBLOCK == UNBLOCK_BURST)
- unblock_short_burst(inf, now);
- #elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
- unblock_short_extra_support(inf, now);
- #endif
- extraq_check_add_unblocked(d, 1);
- }
- else {
- PRINT(4,"long unblocking\n");
- /*long unblocking*/
+ extraq_check_add_unblocked(d, 1);
+ }
+ else {
+ PRINT(4,"long unblocking\n");
+ /*long unblocking*/
#ifdef SEDF_STATS
- inf->long_block_tot++;
+ inf->long_block_tot++;
+#endif
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+ unblock_long_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_EDF \
+ || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+ unblock_long_cons_b(inf, now);
+#elif (UNBLOCK == UNBLOCK_ATROPOS)
+ unblock_long_cons_c(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+ unblock_long_cons_b(inf, now);
+ /*unblock_short_cons_c(inf, now);*/
+#elif (UNBLOCK == UNBLOCK_BURST)
+ unblock_long_burst(inf, now);
#endif
- #if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
- unblock_long_vcons(inf, now);
- #elif (UNBLOCK == UNBLOCK_EDF \
- || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
- unblock_long_cons_b(inf, now);
- #elif (UNBLOCK == UNBLOCK_ATROPOS)
- unblock_long_cons_c(inf, now);
- #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
- unblock_long_cons_b(inf, now);
- /*unblock_short_cons_c(inf, now);*/
- #elif (UNBLOCK == UNBLOCK_BURST)
- unblock_long_burst(inf, now);
- #endif
- extraq_check_add_unblocked(d, 1);
- }
- }
- PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
- "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
- inf->period, now);
- if (PERIOD_BEGIN(inf) > now) {
- __add_to_waitqueue_sort(d);
- PRINT(3,"added to waitq\n");
- }
- else {
- __add_to_runqueue_sort(d);
- PRINT(3,"added to runq\n");
- }
-
+ extraq_check_add_unblocked(d, 1);
+ }
+ }
+ PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+ "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+ inf->period, now);
+ if (PERIOD_BEGIN(inf) > now) {
+ __add_to_waitqueue_sort(d);
+ PRINT(3,"added to waitq\n");
+ }
+ else {
+ __add_to_runqueue_sort(d);
+ PRINT(3,"added to runq\n");
+ }
+
#ifdef SEDF_STATS
- /*do some statistics here...*/
- if (inf->block_abs != 0) {
- inf->block_time_tot += now - inf->block_abs;
- inf->penalty_time_tot +=
- PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
- }
-#endif
- /*sanity check: make sure each extra-aware domain IS on the util-q!*/
- ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
- ASSERT(__task_on_queue(d));
- /*check whether the awakened task needs to invoke the do_schedule
- routine. Try to avoid unnecessary runs but:
- Save approximation: Always switch to scheduler!*/
- if (should_switch(schedule_data[d->processor].curr, d, now)){
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(d->processor);
+ /*do some statistics here...*/
+ if (inf->block_abs != 0) {
+ inf->block_time_tot += now - inf->block_abs;
+ inf->penalty_time_tot +=
+ PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
+ }
#endif
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
- }
+ /*sanity check: make sure each extra-aware domain IS on the util-q!*/
+ ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
+ ASSERT(__task_on_queue(d));
+ /*check whether the awakened task needs to invoke the do_schedule
+ routine. Try to avoid unnecessary runs but:
+ Save approximation: Always switch to scheduler!*/
+ if (should_switch(schedule_data[d->processor].curr, d, now))
+ cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
}
/*Print a lot of use-{full, less} information about a domains in the system*/
static void sedf_dump_domain(struct exec_domain *d) {
- printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
- test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
- printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
- EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
- EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
- (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
- EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
- if (d->cpu_time !=0)
- printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
- / d->cpu_time);
+ printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
+ test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
+ EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
+ EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+ (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
+ EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+ if (d->cpu_time !=0)
+ printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
+ / d->cpu_time);
#ifdef SEDF_STATS
- if (EDOM_INFO(d)->block_time_tot!=0)
- printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
- EDOM_INFO(d)->block_time_tot);
- if (EDOM_INFO(d)->block_tot!=0)
- printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
- "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
- EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
- (EDOM_INFO(d)->short_block_tot * 100)
- / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
- (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
- EDOM_INFO(d)->pen_extra_blocks,
- EDOM_INFO(d)->pen_extra_slices,
- EDOM_INFO(d)->long_block_tot,
- (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
- (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+ if (EDOM_INFO(d)->block_time_tot!=0)
+ printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
+ EDOM_INFO(d)->block_time_tot);
+ if (EDOM_INFO(d)->block_tot!=0)
+ printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
+ "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+ EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
+ (EDOM_INFO(d)->short_block_tot * 100)
+ / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
+ (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
+ EDOM_INFO(d)->pen_extra_blocks,
+ EDOM_INFO(d)->pen_extra_slices,
+ EDOM_INFO(d)->long_block_tot,
+ (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
+ (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
+ (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
#endif
- printf("\n");
+ printf("\n");
}
/*dumps all domains on hte specified cpu*/
static void sedf_dump_cpu_state(int i)
{
- struct list_head *list, *queue, *tmp;
- struct sedf_edom_info *d_inf;
- struct domain *d;
- struct exec_domain *ed;
- int loop = 0;
-
- printk("now=%"PRIu64"\n",NOW());
- queue = RUNQ(i);
- printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
- printk("%3d: ",loop++);
- d_inf = list_entry(list, struct sedf_edom_info, list);
- sedf_dump_domain(d_inf->exec_domain);
- }
-
- queue = WAITQ(i); loop = 0;
- printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
- printk("%3d: ",loop++);
- d_inf = list_entry(list, struct sedf_edom_info, list);
- sedf_dump_domain(d_inf->exec_domain);
- }
-
- queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
- printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
- d_inf = list_entry(list, struct sedf_edom_info,
- extralist[EXTRA_PEN_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->exec_domain);
- }
-
- queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
- printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
- (unsigned long)queue, (unsigned long) queue->next,
- (unsigned long) queue->prev);
- list_for_each_safe ( list, tmp, queue ) {
- d_inf = list_entry(list, struct sedf_edom_info,
- extralist[EXTRA_UTIL_Q]);
- printk("%3d: ",loop++);
- sedf_dump_domain(d_inf->exec_domain);
- }
-
- loop = 0;
- printk("\nnot on Q\n");
- for_each_domain(d)
- for_each_exec_domain(d, ed)
- {
- if (!__task_on_queue(ed) && (ed->processor == i)) {
- printk("%3d: ",loop++);
- sedf_dump_domain(ed);
- }
- }
+ struct list_head *list, *queue, *tmp;
+ struct sedf_edom_info *d_inf;
+ struct domain *d;
+ struct exec_domain *ed;
+ int loop = 0;
+
+ printk("now=%"PRIu64"\n",NOW());
+ queue = RUNQ(i);
+ printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ printk("%3d: ",loop++);
+ d_inf = list_entry(list, struct sedf_edom_info, list);
+ sedf_dump_domain(d_inf->exec_domain);
+ }
+
+ queue = WAITQ(i); loop = 0;
+ printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ printk("%3d: ",loop++);
+ d_inf = list_entry(list, struct sedf_edom_info, list);
+ sedf_dump_domain(d_inf->exec_domain);
+ }
+
+ queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
+ printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
+ (unsigned long)queue, (unsigned long) queue->next,
+ (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ d_inf = list_entry(list, struct sedf_edom_info,
+ extralist[EXTRA_PEN_Q]);
+ printk("%3d: ",loop++);
+ sedf_dump_domain(d_inf->exec_domain);
+ }
+
+ queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
+ printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
+ (unsigned long)queue, (unsigned long) queue->next,
+ (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ d_inf = list_entry(list, struct sedf_edom_info,
+ extralist[EXTRA_UTIL_Q]);
+ printk("%3d: ",loop++);
+ sedf_dump_domain(d_inf->exec_domain);
+ }
+
+ loop = 0;
+ printk("\nnot on Q\n");
+ for_each_domain(d)
+ for_each_exec_domain(d, ed)
+ {
+ if (!__task_on_queue(ed) && (ed->processor == i)) {
+ printk("%3d: ",loop++);
+ sedf_dump_domain(ed);
+ }
+ }
}
/*Adjusts periods and slices of the domains accordingly to their weights*/
static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
- struct exec_domain *p;
- struct domain *d;
- int sumw[NR_CPUS];
- s_time_t sumt[NR_CPUS];
- int cpu;
-
- for (cpu=0; cpu < NR_CPUS; cpu++) {
- sumw[cpu] = 0;
- sumt[cpu] = 0;
- }
- /*sum up all weights*/
- for_each_domain(d)
- for_each_exec_domain(d, p) {
- if (EDOM_INFO(p)->weight)
- sumw[p->processor] += EDOM_INFO(p)->weight;
- else {
- /*don't modify domains who don't have a weight, but sum
- up the time they need, projected to a WEIGHT_PERIOD,
- so that this time is not given to the weight-driven
- domains*/
- /*check for overflows*/
- ASSERT((WEIGHT_PERIOD < ULONG_MAX)
- && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
- sumt[p->processor] += (WEIGHT_PERIOD *
- EDOM_INFO(p)->slice_orig) / EDOM_INFO(p)->period_orig;
- }
- }
- /*adjust all slices (and periods) to the new weight*/
- for_each_domain(d)
- for_each_exec_domain(d, p) {
- if (EDOM_INFO(p)->weight) {
- EDOM_INFO(p)->period_orig =
- EDOM_INFO(p)->period = WEIGHT_PERIOD;
- EDOM_INFO(p)->slice_orig =
- EDOM_INFO(p)->slice = (EDOM_INFO(p)->weight *
- (WEIGHT_PERIOD -WEIGHT_SAFETY -
- sumt[p->processor])) / sumw[p->processor];
- }
- }
- return 0;
+ struct exec_domain *p;
+ struct domain *d;
+ int sumw[NR_CPUS];
+ s_time_t sumt[NR_CPUS];
+ int cpu;
+
+ for (cpu=0; cpu < NR_CPUS; cpu++) {
+ sumw[cpu] = 0;
+ sumt[cpu] = 0;
+ }
+ /*sum up all weights*/
+ for_each_domain(d)
+ for_each_exec_domain(d, p) {
+ if (EDOM_INFO(p)->weight)
+ sumw[p->processor] += EDOM_INFO(p)->weight;
+ else {
+ /*don't modify domains who don't have a weight, but sum
+ up the time they need, projected to a WEIGHT_PERIOD,
+ so that this time is not given to the weight-driven
+ domains*/
+ /*check for overflows*/
+ ASSERT((WEIGHT_PERIOD < ULONG_MAX)
+ && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+ sumt[p->processor] +=
+ (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
+ EDOM_INFO(p)->period_orig;
+ }
+ }
+ /*adjust all slices (and periods) to the new weight*/
+ for_each_domain(d)
+ for_each_exec_domain(d, p) {
+ if (EDOM_INFO(p)->weight) {
+ EDOM_INFO(p)->period_orig =
+ EDOM_INFO(p)->period = WEIGHT_PERIOD;
+ EDOM_INFO(p)->slice_orig =
+ EDOM_INFO(p)->slice =
+ (EDOM_INFO(p)->weight *
+ (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
+ sumw[p->processor];
+ }
+ }
+ return 0;
}
/* set or fetch domain scheduling parameters */
static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
- struct exec_domain *ed;
+ struct exec_domain *ed;
- PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
- "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
- p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
- cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
- if ( cmd->direction == SCHED_INFO_PUT )
- {
- /*check for sane parameters*/
- if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
- return -EINVAL;
- if (cmd->u.sedf.weight) {
- if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
- (! cmd->u.sedf.period)) {
- /*weight driven domains with xtime ONLY!*/
- for_each_exec_domain(p, ed) {
- EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
- EDOM_INFO(ed)->weight = 0;
- EDOM_INFO(ed)->slice = 0;
- EDOM_INFO(ed)->period = WEIGHT_PERIOD;
- }
- } else {
- /*weight driven domains with real-time execution*/
- for_each_exec_domain(p, ed)
- EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
- }
- }
- else {
- /*time driven domains*/
- for_each_exec_domain(p, ed) {
- /* sanity checking! */
- if(cmd->u.sedf.slice > cmd->u.sedf.period )
- return -EINVAL;
- EDOM_INFO(ed)->weight = 0;
- EDOM_INFO(ed)->extraweight = 0;
- EDOM_INFO(ed)->period_orig =
- EDOM_INFO(ed)->period = cmd->u.sedf.period;
- EDOM_INFO(ed)->slice_orig =
- EDOM_INFO(ed)->slice = cmd->u.sedf.slice;
- }
- }
- if (sedf_adjust_weights(cmd))
- return -EINVAL;
-
- for_each_exec_domain(p, ed) {
- EDOM_INFO(ed)->status = (EDOM_INFO(ed)->status &
- ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
- EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
- extraq_check(ed);
- }
- }
- else if ( cmd->direction == SCHED_INFO_GET )
- {
- cmd->u.sedf.period = EDOM_INFO(p->exec_domain[0])->period;
- cmd->u.sedf.slice = EDOM_INFO(p->exec_domain[0])->slice;
- cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
- & EXTRA_AWARE;
- cmd->u.sedf.latency = EDOM_INFO(p->exec_domain[0])->latency;
- cmd->u.sedf.weight = EDOM_INFO(p->exec_domain[0])->weight;
- }
- PRINT(2,"sedf_adjdom_finished\n");
- return 0;
+ PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
+ "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
+ p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
+ cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+ if ( cmd->direction == SCHED_INFO_PUT )
+ {
+ /*check for sane parameters*/
+ if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
+ return -EINVAL;
+ if (cmd->u.sedf.weight) {
+ if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
+ (! cmd->u.sedf.period)) {
+ /*weight driven domains with xtime ONLY!*/
+ for_each_exec_domain(p, ed) {
+ EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
+ EDOM_INFO(ed)->weight = 0;
+ EDOM_INFO(ed)->slice = 0;
+ EDOM_INFO(ed)->period = WEIGHT_PERIOD;
+ }
+ } else {
+ /*weight driven domains with real-time execution*/
+ for_each_exec_domain(p, ed)
+ EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
+ }
+ }
+ else {
+ /*time driven domains*/
+ for_each_exec_domain(p, ed) {
+ /* sanity checking! */
+ if(cmd->u.sedf.slice > cmd->u.sedf.period )
+ return -EINVAL;
+ EDOM_INFO(ed)->weight = 0;
+ EDOM_INFO(ed)->extraweight = 0;
+ EDOM_INFO(ed)->period_orig =
+ EDOM_INFO(ed)->period = cmd->u.sedf.period;
+ EDOM_INFO(ed)->slice_orig =
+ EDOM_INFO(ed)->slice = cmd->u.sedf.slice;
+ }
+ }
+ if (sedf_adjust_weights(cmd))
+ return -EINVAL;
+
+ for_each_exec_domain(p, ed) {
+ EDOM_INFO(ed)->status =
+ (EDOM_INFO(ed)->status &
+ ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
+ EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
+ extraq_check(ed);
+ }
+ }
+ else if ( cmd->direction == SCHED_INFO_GET )
+ {
+ cmd->u.sedf.period = EDOM_INFO(p->exec_domain[0])->period;
+ cmd->u.sedf.slice = EDOM_INFO(p->exec_domain[0])->slice;
+ cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
+ & EXTRA_AWARE;
+ cmd->u.sedf.latency = EDOM_INFO(p->exec_domain[0])->latency;
+ cmd->u.sedf.weight = EDOM_INFO(p->exec_domain[0])->weight;
+ }
+ PRINT(2,"sedf_adjdom_finished\n");
+ return 0;
}
struct scheduler sched_sedf_def = {
.opt_name = "sedf",
.sched_id = SCHED_SEDF,
- .init_idle_task = sedf_init_idle_task,
.alloc_task = sedf_alloc_task,
.add_task = sedf_add_task,
.free_task = sedf_free_task,
- .init_scheduler = sedf_init_scheduler,
.do_schedule = sedf_do_schedule,
.dump_cpu_state = sedf_dump_cpu_state,
.sleep = sedf_sleep,
static char opt_sched[10] = "bvt";
string_param("sched", opt_sched);
-/*#define WAKE_HISTO*/
-/*#define BLOCKTIME_HISTO*/
-/*#define ADV_SCHED_HISTO*/
-//#include <xen/adv_sched_hist.h>
-
#if defined(WAKE_HISTO)
#define BUCKETS 31
#elif defined(BLOCKTIME_HISTO)
xfree(d);
}
-struct exec_domain *alloc_exec_domain_struct(struct domain *d,
- unsigned long vcpu)
+struct exec_domain *alloc_exec_domain_struct(
+ struct domain *d, unsigned long vcpu)
{
struct exec_domain *ed, *edc;
edc->next_in_list = ed;
if (test_bit(_VCPUF_cpu_pinned, &edc->vcpu_flags)) {
- ed->processor = (edc->processor + 1) % smp_num_cpus;
+ ed->processor = (edc->processor + 1) % num_online_cpus();
set_bit(_VCPUF_cpu_pinned, &ed->vcpu_flags);
} else {
- ed->processor = (edc->processor + 1) % smp_num_cpus; /* XXX */
+ ed->processor = (edc->processor + 1) % num_online_cpus();
}
}
{
struct domain *d = ed->domain;
- /* Must be unpaused by control software to start execution. */
- set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
+ /* Initialise the per-domain timer. */
+ init_ac_timer(&ed->timer);
+ ed->timer.cpu = ed->processor;
+ ed->timer.data = (unsigned long)ed;
+ ed->timer.function = &dom_timer_fn;
- if ( d->domain_id != IDLE_DOMAIN_ID )
+ if ( is_idle_task(d) )
{
- /* Initialise the per-domain timer. */
- init_ac_timer(&ed->timer);
- ed->timer.cpu = ed->processor;
- ed->timer.data = (unsigned long)ed;
- ed->timer.function = &dom_timer_fn;
+ schedule_data[ed->processor].curr = ed;
+ schedule_data[ed->processor].idle = ed;
+ set_bit(_VCPUF_running, &ed->vcpu_flags);
}
else
{
- schedule_data[ed->processor].idle = ed;
+ /* Must be unpaused by control software to start execution. */
+ set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
}
SCHED_OP(add_task, ed);
TRACE_2D(TRC_SCHED_DOM_REM, ed->domain->domain_id, ed->vcpu_id);
}
-void init_idle_task(void)
-{
- if ( SCHED_OP(init_idle_task, current) < 0 )
- BUG();
-}
-
void domain_sleep(struct exec_domain *ed)
{
unsigned long flags;
{
struct exec_domain *ed = current;
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(current->processor);
-#endif
-
ed->vcpu_info->evtchn_upcall_mask = 0;
set_bit(_VCPUF_blocked, &ed->vcpu_flags);
/* Voluntarily yield the processor for this allocation. */
static long do_yield(void)
{
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(current->processor);
-#endif
-
TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
__enter_scheduler();
return 0;
spin_lock_irq(&schedule_data[cpu].schedule_lock);
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_from_stop(cpu);
-#endif
now = NOW();
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(cpu);
-#endif
rem_ac_timer(&schedule_data[cpu].s_timer);
next->lastschd = now;
/* reprogramm the timer */
- schedule_data[cpu].s_timer.expires = now + r_time;
+ schedule_data[cpu].s_timer.expires = now + r_time;
add_ac_timer(&schedule_data[cpu].s_timer);
/* Must be protected by the schedule_lock! */
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
- if ( unlikely(prev == next) ) {
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_to_stop(cpu);
-#endif
+ if ( unlikely(prev == next) )
return continue_running(prev);
- }
+
perfc_incrc(sched_ctx);
#if defined(WAKE_HISTO)
prev->domain->domain_id, prev->vcpu_id,
next->domain->domain_id, next->vcpu_id);
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_to_stop(cpu);
-#endif
-
context_switch(prev, next);
}
/* The scheduler timer: force a run through the scheduler */
static void s_timer_fn(unsigned long unused)
{
-#ifdef ADV_SCHED_HISTO
- adv_sched_hist_start(current->processor);
-#endif
-
raise_softirq(SCHEDULE_SOFTIRQ);
perfc_incrc(sched_irq);
}
for ( i = 0; i < NR_CPUS; i++ )
{
spin_lock_init(&schedule_data[i].schedule_lock);
- schedule_data[i].curr = &idle0_exec_domain;
-
+
init_ac_timer(&schedule_data[i].s_timer);
schedule_data[i].s_timer.cpu = i;
schedule_data[i].s_timer.data = 2;
t_timer[i].function = &t_timer_fn;
}
- schedule_data[0].idle = &idle0_exec_domain;
+ schedule_data[0].curr = idle_task[0];
+ schedule_data[0].idle = idle_task[0];
for ( i = 0; schedulers[i] != NULL; i++ )
{
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- if ( SCHED_OP(init_scheduler) < 0 )
- panic("Initialising scheduler failed!");
+ BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0);
+ sched_add_domain(idle_task[0]);
}
/*
*/
void schedulers_start(void)
{
- s_timer_fn(0);
- smp_call_function((void *)s_timer_fn, NULL, 1, 1);
-
t_timer_fn(0);
smp_call_function((void *)t_timer_fn, NULL, 1, 1);
}
-
void dump_runq(unsigned char key)
{
s_time_t now = NOW();
SCHED_OP(dump_settings);
printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu ( i )
{
spin_lock(&schedule_data[i].schedule_lock);
printk("CPU[%02d] ", i);
}
#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
+
void print_sched_histo(unsigned char key)
{
int i, j, k;
- for ( k = 0; k < smp_num_cpus; k++ )
+ for_each_online_cpu ( k )
{
j = 0;
printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
}
}
+
void reset_sched_histo(unsigned char key)
{
int i, j;
- for ( j = 0; j < smp_num_cpus; j++ )
+ for ( j = 0; j < NR_CPUS; j++ )
for ( i=0; i < BUCKETS; i++ )
schedule_data[j].hist[i] = 0;
}
+
#else
-#if defined(ADV_SCHED_HISTO)
-void print_sched_histo(unsigned char key)
-{
- int i, j, k,t;
- printf("Hello!\n");
- for ( k = 0; k < smp_num_cpus; k++ )
- {
- j = 0;
- t = 0;
- printf ("CPU[%02d]: scheduler latency histogram FROM (ms:[count])\n", k);
- for ( i = 0; i < BUCKETS; i++ )
- {
- //if ( schedule_data[k].hist[i] != 0 )
- {
- t += schedule_data[k].from_hist[i];
- if ( i < BUCKETS-1 )
- printk("%3d:[%7u] ", i, schedule_data[k].from_hist[i]);
- else
- printk(" >:[%7u] ", schedule_data[k].from_hist[i]);
- //if ( !(++j % 5) )
- printk("\n");
- }
- }
- printk("\nTotal: %i\n",t);
- }
- for ( k = 0; k < smp_num_cpus; k++ )
- {
- j = 0; t = 0;
- printf ("CPU[%02d]: scheduler latency histogram TO (ms:[count])\n", k);
- for ( i = 0; i < BUCKETS; i++ )
- {
- //if ( schedule_data[k].hist[i] != 0 )
- {
- t += schedule_data[k].from_hist[i];
- if ( i < BUCKETS-1 )
- printk("%3d:[%7u] ", i, schedule_data[k].to_hist[i]);
- else
- printk(" >:[%7u] ", schedule_data[k].to_hist[i]);
- //if ( !(++j % 5) )
- printk("\n");
- }
- }
- printk("\nTotal: %i\n",t);
- }
-
-}
-void reset_sched_histo(unsigned char key)
-{
- int i, j;
- for ( j = 0; j < smp_num_cpus; j++ ) {
- for ( i=0; i < BUCKETS; i++ )
- schedule_data[j].to_hist[i] = schedule_data[j].from_hist[i] = 0;
- schedule_data[j].save_tsc = 0;
- }
-}
-#else
+
void print_sched_histo(unsigned char key) { }
void reset_sched_histo(unsigned char key) { }
-#endif
+
#endif
/*
return;
}
- nr_pages = smp_num_cpus * opt_tbuf_size;
+ nr_pages = num_online_cpus() * opt_tbuf_size;
order = get_order(nr_pages * PAGE_SIZE);
if ( (rawbuf = (char *)alloc_xenheap_pages(order)) == NULL )
for ( i = 0; i < nr_pages; i++ )
SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf + i * PAGE_SIZE), dom0);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu ( i )
{
buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE];
#include <asm/asm-offsets.h>
#include <asm/processor.h>
+#ifndef STR
#define __STR(x) #x
#define STR(x) __STR(x)
+#endif
#ifdef __x86_64__
#include <asm/x86_64/asm_defns.h>
#include <xen/config.h>
+#ifndef STR
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#endif
+
/*
* These have to be done with inline assembly: that way the bit-setting
* is guaranteed to be atomic. All bit operations return 0 if the bit
/**
* find_first_zero_bit - find the first zero bit in a memory region
* @addr: The address to start the search at
- * @size: The maximum bitnumber to search
+ * @size: The maximum size to search
*
* Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit. -1 when none found.
+ * containing a bit.
*/
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+static inline long find_first_zero_bit(
+ const unsigned long *addr, unsigned size)
{
- int d0, d1, d2;
- int res;
+ long d0, d1, d2;
+ long res;
- if (!size)
- return 0;
__asm__ __volatile__(
- "movl $-1,%%eax\n\t"
- "xorl %%edx,%%edx\n\t"
- "repe; scasl\n\t"
+ "mov $-1,%%"__OP"ax\n\t"
+ "xor %%edx,%%edx\n\t"
+ "repe; scas"__OS"\n\t"
"je 1f\n\t"
- "xorl -4(%%"__OP"di),%%eax\n\t"
- "sub"__OS" $4,%%"__OP"di\n\t"
- "bsfl %%eax,%%edx\n"
- "1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t"
- "shl"__OS" $3,%%"__OP"di\n\t"
- "add"__OS" %%"__OP"di,%%"__OP"dx"
+ "lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
+ "xor (%%"__OP"di),%%"__OP"ax\n\t"
+ "bsf %%"__OP"ax,%%"__OP"dx\n"
+ "1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
+ "shl $3,%%"__OP"di\n\t"
+ "add %%"__OP"di,%%"__OP"dx"
:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
return res;
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
*/
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
-{
- unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
- int set = 0, bit = offset & 31, res;
-
- if (bit) {
- /*
- * Look for zero in first byte
- */
- __asm__("bsfl %1,%0\n\t"
- "jne 1f\n\t"
- "movl $32, %0\n"
- "1:"
- : "=r" (set)
- : "r" (~(*p >> bit)));
- if (set < (32 - bit))
- return set + offset;
- set = 32 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full bytes for a zero
- */
- res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
- return (offset + set + res);
-}
+long find_next_zero_bit(const unsigned long *addr, int size, int offset);
/**
- * ffz - find first zero in word.
- * @word: The word to search
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
*
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
*/
-static __inline__ unsigned long ffz(unsigned long word)
+static inline long find_first_bit(
+ const unsigned long *addr, unsigned size)
{
- __asm__("bsf"__OS" %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
+ long d0, d1;
+ long res;
+
+ __asm__ __volatile__(
+ "xor %%eax,%%eax\n\t"
+ "repe; scas"__OS"\n\t"
+ "je 1f\n\t"
+ "lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
+ "bsf (%%"__OP"di),%%"__OP"ax\n"
+ "1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
+ "shl $3,%%"__OP"di\n\t"
+ "add %%"__OP"di,%%"__OP"ax"
+ :"=a" (res), "=&c" (d0), "=&D" (d1)
+ :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
+ return res;
}
/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
*/
-static __inline__ int ffs(int x)
-{
- int r;
+long find_next_bit(const unsigned long *addr, int size, int offset);
- __asm__("bsfl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "g" (x));
- return r+1;
+/* return index of first bet set in val or max when no bit is set */
+static inline unsigned long __scanbit(unsigned long val, unsigned long max)
+{
+ asm("bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
+ return val;
}
+#define find_first_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ (__scanbit(*(unsigned long *)addr,(size))) : \
+ find_first_bit(addr,size)))
+
+#define find_next_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
+ find_next_bit(addr,size,off)))
+
+#define find_first_zero_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ (__scanbit(~*(unsigned long *)addr,(size))) : \
+ find_first_zero_bit(addr,size)))
+
+#define find_next_zero_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
+ find_next_zero_bit(addr,size,off)))
+
+
/*
* These are the preferred 'find first' functions in Xen.
* Both return the appropriate bit index, with the l.s.b. having index 0.
#ifndef __I386_DIV64
#define __I386_DIV64
+/*
+ * do_div() is NOT a C function. It wants to return
+ * two values (the quotient and the remainder), but
+ * since that doesn't work very well in C, what it
+ * does is:
+ *
+ * - modifies the 64-bit dividend _in_place_
+ * - returns the 32-bit remainder
+ *
+ * This ends up being the most efficient "calling
+ * convention" on x86.
+ */
#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod; \
+ unsigned long __upper, __low, __high, __mod, __base; \
+ __base = (base); \
asm("":"=a" (__low), "=d" (__high):"A" (n)); \
__upper = __high; \
if (__high) { \
- __upper = __high % (base); \
- __high = __high / (base); \
+ __upper = __high % (__base); \
+ __high = __high / (__base); \
} \
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
+ asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
asm("":"=A" (n):"a" (__low),"d" (__high)); \
__mod; \
})
+/*
+ * (long)X = ((long long)divs) / (long)div
+ * (long)rem = ((long long)divs) % (long)div
+ *
+ * Warning, this will do an exception if X overflows.
+ */
+#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
+
+extern inline long
+div_ll_X_l_rem(long long divs, long div, long *rem)
+{
+ long dum2;
+ __asm__("divl %2":"=a"(dum2), "=d"(*rem)
+ : "rm"(div), "A"(divs));
+
+ return dum2;
+
+}
#endif
#define local_flush_tlb_one(__addr) \
__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
-#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1)
+#define flush_tlb_all() flush_tlb_mask((1 << num_online_cpus()) - 1)
#ifndef CONFIG_SMP
#define flush_tlb_all_pge() local_flush_tlb_pge()
#define platform_legacy_irq(irq) ((irq) < 16)
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void send_IPI_self(int vector);
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
+void disable_8259A_irq(unsigned int irq);
+void enable_8259A_irq(unsigned int irq);
+int i8259A_irq_pending(unsigned int irq);
+void make_8259A_irq(unsigned int irq);
+void init_8259A(int aeoi);
+void send_IPI_self(int vector);
+void init_VISWS_APIC_irqs(void);
+void setup_IO_APIC(void);
+void disable_IO_APIC(void);
+void print_IO_APIC(void);
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+void send_IPI(int dest, int vector);
+void setup_ioapic_dest(void);
extern unsigned long io_apic_irqs;
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
-extern char _stext, _etext;
-
#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-#include <xen/irq.h>
-
static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
{
-#if defined(CONFIG_X86_IO_APIC)
if (IO_APIC_IRQ(i))
send_IPI_self(IO_APIC_VECTOR(i));
-#endif
}
#endif /* _ASM_HW_IRQ_H */
#define current_cpu_data boot_cpu_data
#endif
+extern int phys_proc_id[NR_CPUS];
extern char ignore_irq13;
extern void identify_cpu(struct cpuinfo_x86 *);
#include <xen/config.h>
#include <xen/lib.h>
#include <xen/types.h>
+#include <xen/bitops.h>
/*
* bitmaps provide bit arrays that consume one or more unsigned
+#ifndef __XEN_CPUMASK_H
+#define __XEN_CPUMASK_H
+
/*
- * XXX This to be replaced with the Linux file in the near future.
+ * Cpumasks provide a bitmap suitable for representing the
+ * set of CPU's in a system, one bit position per CPU number.
+ *
+ * See detailed comments in the file xen/bitmap.h describing the
+ * data type on which these cpumasks are based.
+ *
+ * For details of cpumask_scnprintf() and cpumask_parse(),
+ * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ *
+ * The available cpumask operations are:
+ *
+ * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
+ * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask
+ * void cpus_setall(mask) set all bits
+ * void cpus_clear(mask) clear all bits
+ * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask
+ * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask
+ *
+ * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection]
+ * void cpus_or(dst, src1, src2) dst = src1 | src2 [union]
+ * void cpus_xor(dst, src1, src2) dst = src1 ^ src2
+ * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
+ * void cpus_complement(dst, src) dst = ~src
+ *
+ * int cpus_equal(mask1, mask2) Does mask1 == mask2?
+ * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
+ * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2?
+ * int cpus_empty(mask) Is mask empty (no bits sets)?
+ * int cpus_full(mask) Is mask full (all bits sets)?
+ * int cpus_weight(mask) Hamming weigh - number of set bits
+ *
+ * void cpus_shift_right(dst, src, n) Shift right
+ * void cpus_shift_left(dst, src, n) Shift left
+ *
+ * int first_cpu(mask) Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
+ *
+ * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
+ * CPU_MASK_ALL Initializer - all bits set
+ * CPU_MASK_NONE Initializer - no bits set
+ * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
+ *
+ * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
+ * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ *
+ * for_each_cpu_mask(cpu, mask) for-loop cpu over mask
+ *
+ * int num_online_cpus() Number of online CPUs
+ * int num_possible_cpus() Number of all possible CPUs
+ * int num_present_cpus() Number of present CPUs
+ *
+ * int cpu_online(cpu) Is some cpu online?
+ * int cpu_possible(cpu) Is some cpu possible?
+ * int cpu_present(cpu) Is some cpu present (can schedule)?
+ *
+ * int any_online_cpu(mask) First online cpu in mask
+ *
+ * for_each_cpu(cpu) for-loop cpu over cpu_possible_map
+ * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map
+ * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
+ * to generate slightly worse code. Note for example the additional
+ * 40 lines of assembly code compiling the "for each possible cpu"
+ * loops buried in the disk_stat_read() macros calls when compiling
+ * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple
+ * one-line #define for cpu_isset(), instead of wrapping an inline
+ * inside a macro, the way we do the other calls.
*/
-#ifndef __XEN_CPUMASK_H__
-#define __XEN_CPUMASK_H__
-
+#include <xen/config.h>
#include <xen/bitmap.h>
+#include <xen/kernel.h>
-typedef u32 cpumask_t;
+typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+extern cpumask_t _unused_cpumask_arg_;
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+ set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+ clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+ return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
+static inline void __cpus_complement(cpumask_t *dstp,
+ const cpumask_t *srcp, int nbits)
+{
+ bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
+static inline int __cpus_full(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_full(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_right(dst, src, n) \
+ __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_right(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
+static inline int __first_cpu(const cpumask_t *srcp, int nbits)
+{
+ return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
+}
+
+#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
+static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
+{
+ return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
+}
+
+#define cpumask_of_cpu(cpu) \
+({ \
+ typeof(_unused_cpumask_arg_) m; \
+ if (sizeof(m) == sizeof(unsigned long)) { \
+ m.bits[0] = 1UL<<(cpu); \
+ } else { \
+ cpus_clear(m); \
+ cpu_set((cpu), m); \
+ } \
+ m; \
+})
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#else
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
-#ifndef cpu_online_map
-extern cpumask_t cpu_online_map;
#endif
-static inline int cpus_weight(cpumask_t w)
+#define CPU_MASK_NONE \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
+} }
+
+#define CPU_MASK_CPU0 \
+(cpumask_t) { { \
+ [0] = 1UL \
+} }
+
+#define cpus_addr(src) ((src).bits)
+
+/*
+#define cpumask_scnprintf(buf, len, src) \
+ __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpumask_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
{
- unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
- res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
- res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
- res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
- return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+ return bitmap_scnprintf(buf, len, srcp->bits, nbits);
}
-#define cpus_addr(_m) (&(_m))
+#define cpumask_parse(ubuf, ulen, src) \
+ __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
+static inline int __cpumask_parse(const char __user *buf, int len,
+ cpumask_t *dstp, int nbits)
+{
+ return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+*/
+
+#if NR_CPUS > 1
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = first_cpu(mask); \
+ (cpu) < NR_CPUS; \
+ (cpu) = next_cpu((cpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_cpu_mask(cpu, mask) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#endif /* NR_CPUS */
+
+/*
+ * The following particular system cpumasks and operations manage
+ * possible, present and online cpus. Each of them is a fixed size
+ * bitmap of size NR_CPUS.
+ *
+ * #ifdef CONFIG_HOTPLUG_CPU
+ * cpu_possible_map - all NR_CPUS bits set
+ * cpu_present_map - has bit 'cpu' set iff cpu is populated
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #else
+ * cpu_possible_map - has bit 'cpu' set iff cpu is populated
+ * cpu_present_map - copy of cpu_possible_map
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #endif
+ *
+ * In either case, NR_CPUS is fixed at compile time, as the static
+ * size of these bitmaps. The cpu_possible_map is fixed at boot
+ * time, as the set of CPU id's that it is possible might ever
+ * be plugged in at anytime during the life of that system boot.
+ * The cpu_present_map is dynamic(*), representing which CPUs
+ * are currently plugged in. And cpu_online_map is the dynamic
+ * subset of cpu_present_map, indicating those CPUs available
+ * for scheduling.
+ *
+ * If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ * all NR_CPUS bits set, otherwise it is just the set of CPUs that
+ * ACPI reports present at boot.
+ *
+ * If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ * depending on what ACPI reports as currently plugged in, otherwise
+ * cpu_present_map is just a copy of cpu_possible_map.
+ *
+ * (*) Well, cpu_present_map is dynamic in the hotplug case. If not
+ * hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *
+ * Subtleties:
+ * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
+ * assumption that their single CPU is online. The UP
+ * cpu_{online,possible,present}_maps are placebos. Changing them
+ * will have no useful affect on the following num_*_cpus()
+ * and cpu_*() macros in the UP case. This ugliness is a UP
+ * optimization - don't waste any instructions or memory references
+ * asking if you're online or how many CPUs there are if there is
+ * only one CPU.
+ * 2) Most SMP arch's #define some of these maps to be some
+ * other map specific to that arch. Therefore, the following
+ * must be #define macros, not inlines. To see why, examine
+ * the assembly code produced by the following. Note that
+ * set1() writes phys_x_map, but set2() writes x_map:
+ * int x_map, phys_x_map;
+ * #define set1(a) x_map = a
+ * inline void set2(int a) { x_map = a; }
+ * #define x_map phys_x_map
+ * main(){ set1(3); set2(5); }
+ */
+
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_present_map;
+
+#if NR_CPUS > 1
+#define num_online_cpus() cpus_weight(cpu_online_map)
+#define num_possible_cpus() cpus_weight(cpu_possible_map)
+#define num_present_cpus() cpus_weight(cpu_present_map)
+#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
+#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
+#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
+#else
+#define num_online_cpus() 1
+#define num_possible_cpus() 1
+#define num_present_cpus() 1
+#define cpu_online(cpu) ((cpu) == 0)
+#define cpu_possible(cpu) ((cpu) == 0)
+#define cpu_present(cpu) ((cpu) == 0)
+#endif
+
+#define any_online_cpu(mask) \
+({ \
+ int cpu; \
+ for_each_cpu_mask(cpu, (mask)) \
+ if (cpu_online(cpu)) \
+ break; \
+ cpu; \
+})
+
+#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map)
+#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
-#endif /* __XEN_CPUMASK_H__ */
+#endif /* __XEN_CPUMASK_H */
#define max_t(type,x,y) \
({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ *
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+
#endif /* _LINUX_KERNEL_H */
#ifndef __XEN_SCHED_IF_H__
#define __XEN_SCHED_IF_H__
-//#define ADV_SCHED_HISTO
#define BUCKETS 10
/*300*/
void *sched_priv;
struct ac_timer s_timer; /* scheduling timer */
unsigned long tick; /* current periodic 'tick' */
-#ifdef ADV_SCHED_HISTO
- u32 to_hist[BUCKETS];
- u32 from_hist[BUCKETS];
- u64 save_tsc;
-#endif
#ifdef BUCKETS
u32 hist[BUCKETS]; /* for scheduler latency histogram */
#endif
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
- int (*init_scheduler) (void);
- int (*init_idle_task) (struct exec_domain *);
int (*alloc_task) (struct exec_domain *);
void (*add_task) (struct exec_domain *);
void (*free_task) (struct domain *);
unsigned long start_stack,
unsigned long start_info);
-extern unsigned long wait_init_idle;
-#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
-
#define set_current_state(_s) do { current->state = (_s); } while (0)
void scheduler_init(void);
void schedulers_start(void);
long sched_ctl(struct sched_ctl_cmd *);
long sched_adjdom(struct sched_adjdom_cmd *);
int sched_id();
-void init_idle_task(void);
void domain_wake(struct exec_domain *d);
void domain_sleep(struct exec_domain *d);
#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
/*
- * Boot processor call to load the other CPU's
+ * Prepare machine for booting other CPUs.
*/
-extern void smp_boot_cpus(void);
+extern void smp_prepare_cpus(unsigned int max_cpus);
/*
- * Processor call in. Must hold processors until ..
+ * Bring a CPU up
*/
-extern void smp_callin(void);
+extern int __cpu_up(unsigned int cpunum);
/*
- * Multiprocessors may now schedule
+ * Final polishing of CPUs
*/
-extern void smp_commence(void);
+extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
return ret;
}
-/*
- * True once the per process idle is forked
- */
-extern int smp_threads_ready;
-
-extern int smp_num_cpus;
extern int ht_per_core;
extern int opt_noht;
#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/
#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */
+/*
+ * Mark the boot cpu "online" so that it can call console drivers in
+ * printk() and can access its per-cpu storage.
+ */
+void smp_prepare_boot_cpu(void);
+
#else
/*
#define smp_send_event_check_mask(_m) ((void)0)
#define smp_send_event_check_cpu(_p) ((void)0)
-#define smp_num_cpus 1
+#ifndef __smp_processor_id
#define smp_processor_id() 0
+#endif
#define hard_smp_processor_id() 0
-#define smp_threads_ready 1
-#define kernel_lock()
-#define cpu_logical_map(cpu) 0
-#define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) 0
#define on_each_cpu(func,info,retry,wait) ({ func(info); 0; })
-#define cpu_online_map 1
+#define num_booting_cpus() 1
+#define smp_prepare_boot_cpu() do {} while (0)
#endif